diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index a7c47a87..4d0fac0f 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -1,938 +1,871 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden <shawn@git.icu> * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see <http://www.gnu.org/licenses/>. * * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, * and Cryptogams by Andy Polyakov, and if made part of a release of either * or both projects, is thereafter dual-licensed under the license said project * is released under. */ #include <config.h> #include "rijndael-internal.h" #include "cipher-internal.h" #include "bufhelp.h" #ifdef USE_PPC_CRYPTO #include <altivec.h> typedef vector unsigned char block; typedef union { u32 data32[4]; } __attribute__((packed, aligned(1), may_alias)) u128_t; #define ALWAYS_INLINE inline __attribute__((always_inline)) #define NO_INLINE __attribute__((noinline)) #define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) #define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION #define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE #define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE #define ALIGNED_LOAD(in_ptr) \ (vec_aligned_ld (0, (const unsigned char *)(in_ptr))) #define ALIGNED_STORE(out_ptr, vec) \ (vec_aligned_st ((vec), 0, (unsigned char *)(out_ptr))) #define VEC_LOAD_BE(in_ptr, bige_const) \ (vec_load_be (0, (const unsigned char *)(in_ptr), bige_const)) #define VEC_STORE_BE(out_ptr, vec, bige_const) \ (vec_store_be ((vec), 0, (unsigned char *)(out_ptr), bige_const)) +#define ROUND_KEY_VARIABLES \ + block rkey0, rkeylast + +#define PRELOAD_ROUND_KEYS(nrounds) \ + do { \ + rkey0 = ALIGNED_LOAD(&rk[0]); \ + rkeylast = ALIGNED_LOAD(&rk[nrounds]); \ + } while (0) + + +#define AES_ENCRYPT(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[1])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[2])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[3])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[4])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[5])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[6])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[7])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[8])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[9])); \ + if (nrounds >= 12) \ + { \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[10])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[11])); \ + if (rounds > 12) \ + { \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[12])); \ + blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[13])); \ + } \ + } \ + blk = vec_cipherlast_be (blk, rkeylast); \ + } while (0) + + +#define AES_DECRYPT(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[1])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[2])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[3])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[4])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[5])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[6])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[7])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[8])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[9])); \ + if (nrounds >= 12) \ + { \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[10])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[11])); \ + if (rounds > 12) \ + { \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[12])); \ + blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[13])); \ + } \ + } \ + blk = vec_ncipherlast_be (blk, rkeylast); \ + } while (0) + + static const block vec_bswap32_const = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; static ASM_FUNC_ATTR_INLINE block vec_aligned_ld(unsigned long offset, const unsigned char *ptr) { #ifndef WORDS_BIGENDIAN block vec; __asm__ ("lvx %0,%1,%2\n\t" : "=v" (vec) : "r" (offset), "r" ((uintptr_t)ptr) : "memory"); return vec; #else return vec_vsx_ld (offset, ptr); #endif } static ASM_FUNC_ATTR_INLINE block vec_load_be_const(void) { #ifndef WORDS_BIGENDIAN return ~ALIGNED_LOAD(&vec_bswap32_const); #else static const block vec_dummy = { 0 }; return vec_dummy; #endif } static ASM_FUNC_ATTR_INLINE block vec_load_be(unsigned long offset, const unsigned char *ptr, block be_bswap_const) { #ifndef WORDS_BIGENDIAN block vec; /* GCC vec_vsx_ld is generating two instructions on little-endian. Use * lxvw4x directly instead. */ __asm__ ("lxvw4x %x0,%1,%2\n\t" : "=wa" (vec) : "r" (offset), "r" ((uintptr_t)ptr) : "memory"); __asm__ ("vperm %0,%1,%1,%2\n\t" : "=v" (vec) : "v" (vec), "v" (be_bswap_const)); return vec; #else (void)be_bswap_const; return vec_vsx_ld (offset, ptr); #endif } static ASM_FUNC_ATTR_INLINE void vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr) { #ifndef WORDS_BIGENDIAN __asm__ ("stvx %0,%1,%2\n\t" : : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) : "memory"); #else vec_vsx_st (vec, offset, ptr); #endif } static ASM_FUNC_ATTR_INLINE void vec_store_be(block vec, unsigned long offset, unsigned char *ptr, block be_bswap_const) { #ifndef WORDS_BIGENDIAN /* GCC vec_vsx_st is generating two instructions on little-endian. Use * stxvw4x directly instead. */ __asm__ ("vperm %0,%1,%1,%2\n\t" : "=v" (vec) : "v" (vec), "v" (be_bswap_const)); __asm__ ("stxvw4x %x0,%1,%2\n\t" : : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) : "memory"); #else (void)be_bswap_const; vec_vsx_st (vec, offset, ptr); #endif } static ASM_FUNC_ATTR_INLINE u32 _gcry_aes_sbox4_ppc8(u32 fourbytes) { union { PROPERLY_ALIGNED_TYPE dummy; block data_vec; u32 data32[4]; } u; u.data32[0] = fourbytes; u.data_vec = vec_sbox_be(u.data_vec); return u.data32[0]; } void _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) { const block bige_const = vec_load_be_const(); union { PROPERLY_ALIGNED_TYPE dummy; byte data[MAXKC][4]; u32 data32[MAXKC]; } tkk[2]; unsigned int rounds = ctx->rounds; int KC = rounds - 6; unsigned int keylen = KC * 4; u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; unsigned int i, r, t; byte rcon = 1; int j; #define k tkk[0].data #define k_u32 tkk[0].data32 #define tk tkk[1].data #define tk_u32 tkk[1].data32 #define W (ctx->keyschenc) #define W_u32 (ctx->keyschenc32) for (i = 0; i < keylen; i++) { k[i >> 2][i & 3] = key[i]; } for (j = KC-1; j >= 0; j--) { tk_u32[j] = k_u32[j]; } r = 0; t = 0; /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } while (r < rounds + 1) { tk_u32[0] ^= le_bswap32( _gcry_aes_sbox4_ppc8(rol(le_bswap32(tk_u32[KC - 1]), 24)) ^ rcon); if (KC != 8) { for (j = 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } else { for (j = 1; j < KC/2; j++) { tk_u32[j] ^= tk_u32[j-1]; } tk_u32[KC/2] ^= le_bswap32(_gcry_aes_sbox4_ppc8(le_bswap32(tk_u32[KC/2 - 1]))); for (j = KC/2 + 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b); } /* Store in big-endian order. */ for (r = 0; r <= rounds; r++) { #ifndef WORDS_BIGENDIAN VEC_STORE_BE(&ekey[r], ALIGNED_LOAD(&ekey[r]), bige_const); #else block rvec = ALIGNED_LOAD(&ekey[r]); ALIGNED_STORE(&ekey[r], vec_perm(rvec, rvec, vec_bswap32_const)); (void)bige_const; #endif } #undef W #undef tk #undef k #undef W_u32 #undef tk_u32 #undef k_u32 wipememory(&tkk, sizeof(tkk)); } /* Make a decryption key from an encryption key. */ -void -_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) +static ASM_FUNC_ATTR_INLINE void +aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) { u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; u128_t *dkey = (u128_t *)(void *)ctx->keyschdec; int rounds = ctx->rounds; int rr; int r; r = 0; rr = rounds; for (r = 0, rr = rounds; r <= rounds; r++, rr--) { ALIGNED_STORE(&dkey[r], ALIGNED_LOAD(&ekey[rr])); } } -static ASM_FUNC_ATTR_INLINE block -aes_ppc8_encrypt_altivec (const RIJNDAEL_context *ctx, block a) +void +_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) { - u128_t *rk = (u128_t *)ctx->keyschenc; - int rounds = ctx->rounds; - int r; - -#define DO_ROUND(r) (a = vec_cipher_be (a, ALIGNED_LOAD (&rk[r]))) - - a = ALIGNED_LOAD(&rk[0]) ^ a; - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - r = 10; - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - r = 12; - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - r = 14; - } - } - a = vec_cipherlast_be(a, ALIGNED_LOAD(&rk[r])); - -#undef DO_ROUND - - return a; + aes_ppc8_prepare_decryption (ctx); } -static ASM_FUNC_ATTR_INLINE block -aes_ppc8_decrypt_altivec (const RIJNDAEL_context *ctx, block a) +unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) { - u128_t *rk = (u128_t *)ctx->keyschdec; + const block bige_const = vec_load_be_const(); + const u128_t *rk = (u128_t *)&ctx->keyschenc; int rounds = ctx->rounds; - int r; - -#define DO_ROUND(r) (a = vec_ncipher_be (a, ALIGNED_LOAD (&rk[r]))) - - a = ALIGNED_LOAD(&rk[0]) ^ a; - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - r = 10; - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - r = 12; - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - r = 14; - } - } - a = vec_ncipherlast_be(a, ALIGNED_LOAD(&rk[r])); + ROUND_KEY_VARIABLES; + block b; -#undef DO_ROUND + b = VEC_LOAD_BE (in, bige_const); - return a; -} + PRELOAD_ROUND_KEYS (rounds); - -unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, - unsigned char *b, - const unsigned char *a) -{ - const block bige_const = vec_load_be_const(); - block sa; - - sa = VEC_LOAD_BE (a, bige_const); - sa = aes_ppc8_encrypt_altivec (ctx, sa); - VEC_STORE_BE (b, sa, bige_const); + AES_ENCRYPT (b, rounds); + VEC_STORE_BE (out, b, bige_const); return 0; /* does not use stack */ } unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, - unsigned char *b, - const unsigned char *a) + unsigned char *out, + const unsigned char *in) { const block bige_const = vec_load_be_const(); - block sa; + const u128_t *rk = (u128_t *)&ctx->keyschdec; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block b; + + b = VEC_LOAD_BE (in, bige_const); - sa = VEC_LOAD_BE (a, bige_const); - sa = aes_ppc8_decrypt_altivec (ctx, sa); - VEC_STORE_BE (b, sa, bige_const); + PRELOAD_ROUND_KEYS (rounds); + + AES_DECRYPT (b, rounds); + VEC_STORE_BE (out, b, bige_const); return 0; /* does not use stack */ } -#if 0 size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, - int encrypt) + const void *inbuf_arg, size_t nblocks, + int encrypt) { + const block bige_const = vec_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; - unsigned char *outbuf = outbuf_arg; - const unsigned char *inbuf = inbuf_arg; - block *in = (block*)inbuf; - block *out = (block*)outbuf; - uintptr_t zero = 0; - int r; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; + u64 data_nblocks = c->u_mode.ocb.data_nblocks; + block l0, l1, l2, l; + block b0, b1, b2, b3, b4, b5, b6, b7, b; + block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; + block rkey; + block ctr, iv; + ROUND_KEY_VARIABLES; + + iv = VEC_LOAD_BE (c->u_iv.iv, bige_const); + ctr = VEC_LOAD_BE (c->u_ctr.ctr, bige_const); + + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], bige_const); if (encrypt) { - const int unroll = 8; - block unalignedprev, ctr, iv; + const u128_t *rk = (u128_t *)&ctx->keyschenc; - if (((uintptr_t)inbuf % 16) != 0) + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - unalignedprev = vec_ld(0, in++); - } + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); + b = VEC_LOAD_BE (in, bige_const); - iv = vec_ld (0, (block*)&c->u_iv.iv); - ctr = vec_ld (0, (block*)&c->u_ctr.ctr); + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + b ^= iv; - for ( ;nblocks >= unroll; nblocks -= unroll) - { - u64 i = c->u_mode.ocb.data_nblocks + 1; - block l0, l1, l2, l3, l4, l5, l6, l7; - block b0, b1, b2, b3, b4, b5, b6, b7; - block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; - const block *rk = (block*)&ctx->keyschenc; + VEC_STORE_BE (out, b, bige_const); - c->u_mode.ocb.data_nblocks += unroll; + in += 1; + out += 1; + } - iv0 = iv; - if ((uintptr_t)inbuf % 16 == 0) - { - b0 = vec_ld (0, in++); - b1 = vec_ld (0, in++); - b2 = vec_ld (0, in++); - b3 = vec_ld (0, in++); - b4 = vec_ld (0, in++); - b5 = vec_ld (0, in++); - b6 = vec_ld (0, in++); - b7 = vec_ld (0, in++); - } - else - { - block unaligned0, unaligned1, unaligned2, - unaligned3, unaligned4, unaligned5, unaligned6; - unaligned0 = vec_ld (0, in++); - unaligned1 = vec_ld (0, in++); - unaligned2 = vec_ld (0, in++); - unaligned3 = vec_ld (0, in++); - unaligned4 = vec_ld (0, in++); - unaligned5 = vec_ld (0, in++); - unaligned6 = vec_ld (0, in++); - b0 = vec_perm (unalignedprev, unaligned0, vec_lvsl (0, inbuf)); - unalignedprev = vec_ld (0, in++); - b1 = vec_perm(unaligned0, unaligned1, vec_lvsl (0, inbuf)); - b2 = vec_perm(unaligned1, unaligned2, vec_lvsl (0, inbuf)); - b3 = vec_perm(unaligned2, unaligned3, vec_lvsl (0, inbuf)); - b4 = vec_perm(unaligned3, unaligned4, vec_lvsl (0, inbuf)); - b5 = vec_perm(unaligned4, unaligned5, vec_lvsl (0, inbuf)); - b6 = vec_perm(unaligned5, unaligned6, vec_lvsl (0, inbuf)); - b7 = vec_perm(unaligned6, unalignedprev, vec_lvsl (0, inbuf)); - } + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE (in + 0, bige_const); + b1 = VEC_LOAD_BE (in + 1, bige_const); + b2 = VEC_LOAD_BE (in + 2, bige_const); + b3 = VEC_LOAD_BE (in + 3, bige_const); + b4 = VEC_LOAD_BE (in + 4, bige_const); + b5 = VEC_LOAD_BE (in + 5, bige_const); + b6 = VEC_LOAD_BE (in + 6, bige_const); + b7 = VEC_LOAD_BE (in + 7, bige_const); - l0 = *(block*)ocb_get_l (c, i++); - l1 = *(block*)ocb_get_l (c, i++); - l2 = *(block*)ocb_get_l (c, i++); - l3 = *(block*)ocb_get_l (c, i++); - l4 = *(block*)ocb_get_l (c, i++); - l5 = *(block*)ocb_get_l (c, i++); - l6 = *(block*)ocb_get_l (c, i++); - l7 = *(block*)ocb_get_l (c, i++); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; - iv0 ^= l0; + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l2; + iv4 = iv ^ l1 ^ l2 ^ l0; + iv5 = iv ^ l2 ^ l0; + iv6 = iv ^ l2; + iv7 = iv ^ l2 ^ l; + b0 ^= iv0; - iv1 = iv0 ^ l1; b1 ^= iv1; - iv2 = iv1 ^ l2; b2 ^= iv2; - iv3 = iv2 ^ l3; b3 ^= iv3; - iv4 = iv3 ^ l4; b4 ^= iv4; - iv5 = iv4 ^ l5; b5 ^= iv5; - iv6 = iv5 ^ l6; b6 ^= iv6; - iv7 = iv6 ^ l7; b7 ^= iv7; - - b0 = swap_if_le (b0); - b1 = swap_if_le (b1); - b2 = swap_if_le (b2); - b3 = swap_if_le (b3); - b4 = swap_if_le (b4); - b5 = swap_if_le (b5); - b6 = swap_if_le (b6); - b7 = swap_if_le (b7); - - b0 ^= rk[0]; - b1 ^= rk[0]; - b2 ^= rk[0]; - b3 ^= rk[0]; - b4 ^= rk[0]; - b5 ^= rk[0]; - b6 ^= rk[0]; - b7 ^= rk[0]; - - for (r = 1;r < rounds;r++) - { - __asm__ volatile ("vcipher %0, %0, %1\n\t" - :"+v" (b0) - :"v" (rk[r])); - __asm__ volatile ("vcipher %0, %0, %1\n\t" - :"+v" (b1) - :"v" (rk[r])); - __asm__ volatile ("vcipher %0, %0, %1\n\t" - :"+v" (b2) - :"v" (rk[r])); - __asm__ volatile ("vcipher %0, %0, %1\n\t" - :"+v" (b3) - :"v" (rk[r])); - __asm__ volatile ("vcipher %0, %0, %1\n\t" - :"+v" (b4) - :"v" (rk[r])); - __asm__ volatile ("vcipher %0, %0, %1\n\t" - :"+v" (b5) - :"v" (rk[r])); - __asm__ volatile ("vcipher %0, %0, %1\n\t" - :"+v" (b6) - :"v" (rk[r])); - __asm__ volatile ("vcipher %0, %0, %1\n\t" - :"+v" (b7) - :"v" (rk[r])); - } - __asm__ volatile ("vcipherlast %0, %0, %1\n\t" - :"+v" (b0) - :"v" (rk[r])); - __asm__ volatile ("vcipherlast %0, %0, %1\n\t" - :"+v" (b1) - :"v" (rk[r])); - __asm__ volatile ("vcipherlast %0, %0, %1\n\t" - :"+v" (b2) - :"v" (rk[r])); - __asm__ volatile ("vcipherlast %0, %0, %1\n\t" - :"+v" (b3) - :"v" (rk[r])); - __asm__ volatile ("vcipherlast %0, %0, %1\n\t" - :"+v" (b4) - :"v" (rk[r])); - __asm__ volatile ("vcipherlast %0, %0, %1\n\t" - :"+v" (b5) - :"v" (rk[r])); - __asm__ volatile ("vcipherlast %0, %0, %1\n\t" - :"+v" (b6) - :"v" (rk[r])); - __asm__ volatile ("vcipherlast %0, %0, %1\n\t" - :"+v" (b7) - :"v" (rk[r])); - - iv = iv7; - - /* The unaligned store stxvb16x writes big-endian, - so in the unaligned case we swap the iv instead of the bytes */ - if ((uintptr_t)outbuf % 16 == 0) + iv = iv7 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (&rk[r]); \ + b0 = vec_cipher_be (b0, rkey); \ + b1 = vec_cipher_be (b1, rkey); \ + b2 = vec_cipher_be (b2, rkey); \ + b3 = vec_cipher_be (b3, rkey); \ + b4 = vec_cipher_be (b4, rkey); \ + b5 = vec_cipher_be (b5, rkey); \ + b6 = vec_cipher_be (b6, rkey); \ + b7 = vec_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) { - vec_vsx_st (swap_if_le (b0) ^ iv0, 0, out++); - vec_vsx_st (swap_if_le (b1) ^ iv1, 0, out++); - vec_vsx_st (swap_if_le (b2) ^ iv2, 0, out++); - vec_vsx_st (swap_if_le (b3) ^ iv3, 0, out++); - vec_vsx_st (swap_if_le (b4) ^ iv4, 0, out++); - vec_vsx_st (swap_if_le (b5) ^ iv5, 0, out++); - vec_vsx_st (swap_if_le (b6) ^ iv6, 0, out++); - vec_vsx_st (swap_if_le (b7) ^ iv7, 0, out++); + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } } - else + +#undef DO_ROUND + + rkey = rkeylast ^ rkey0; + b0 = vec_cipherlast_be (b0, rkey ^ iv0); + b1 = vec_cipherlast_be (b1, rkey ^ iv1); + b2 = vec_cipherlast_be (b2, rkey ^ iv2); + b3 = vec_cipherlast_be (b3, rkey ^ iv3); + b4 = vec_cipherlast_be (b4, rkey ^ iv4); + b5 = vec_cipherlast_be (b5, rkey ^ iv5); + b6 = vec_cipherlast_be (b6, rkey ^ iv6); + b7 = vec_cipherlast_be (b7, rkey ^ iv7); + + VEC_STORE_BE (out + 0, b0, bige_const); + VEC_STORE_BE (out + 1, b1, bige_const); + VEC_STORE_BE (out + 2, b2, bige_const); + VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out + 4, b4, bige_const); + VEC_STORE_BE (out + 5, b5, bige_const); + VEC_STORE_BE (out + 6, b6, bige_const); + VEC_STORE_BE (out + 7, b7, bige_const); + + in += 8; + out += 8; + } + + if (nblocks >= 4 && (data_nblocks % 4) == 0) + { + b0 = VEC_LOAD_BE (in + 0, bige_const); + b1 = VEC_LOAD_BE (in + 1, bige_const); + b2 = VEC_LOAD_BE (in + 2, bige_const); + b3 = VEC_LOAD_BE (in + 3, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + + ctr ^= b0 ^ b1 ^ b2 ^ b3; + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + iv = iv3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (&rk[r]); \ + b0 = vec_cipher_be (b0, rkey); \ + b1 = vec_cipher_be (b1, rkey); \ + b2 = vec_cipher_be (b2, rkey); \ + b3 = vec_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) { - b0 ^= swap_if_le (iv0); - b1 ^= swap_if_le (iv1); - b2 ^= swap_if_le (iv2); - b3 ^= swap_if_le (iv3); - b4 ^= swap_if_le (iv4); - b5 ^= swap_if_le (iv5); - b6 ^= swap_if_le (iv6); - b7 ^= swap_if_le (iv7); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b0), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b1), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b2), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b3), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b4), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b5), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b6), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b7), "r" (zero), "r" ((uintptr_t)(out++))); + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } } + +#undef DO_ROUND + + rkey = rkeylast ^ rkey0; + b0 = vec_cipherlast_be (b0, rkey ^ iv0); + b1 = vec_cipherlast_be (b1, rkey ^ iv1); + b2 = vec_cipherlast_be (b2, rkey ^ iv2); + b3 = vec_cipherlast_be (b3, rkey ^ iv3); + + VEC_STORE_BE (out + 0, b0, bige_const); + VEC_STORE_BE (out + 1, b1, bige_const); + VEC_STORE_BE (out + 2, b2, bige_const); + VEC_STORE_BE (out + 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; } - for ( ;nblocks; nblocks-- ) + for (; nblocks; nblocks--) { - block b; - u64 i = ++c->u_mode.ocb.data_nblocks; - const block l = *(block*)ocb_get_l (c, i); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); + b = VEC_LOAD_BE (in, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; - if ((uintptr_t)in % 16 == 0) - { - b = vec_ld (0, in++); - } - else - { - block unalignedprevprev; - unalignedprevprev = unalignedprev; - unalignedprev = vec_ld (0, in++); - b = vec_perm (unalignedprevprev, unalignedprev, vec_lvsl (0, inbuf)); - } - /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ b ^= iv; - b = swap_if_le (b); - b = _gcry_aes_ppc8_encrypt_altivec (ctx, b); - if ((uintptr_t)out % 16 == 0) - { - vec_vsx_st (swap_if_le (b) ^ iv, 0, out++); - } - else - { - b ^= swap_if_le (iv); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - : - : "wa" (b), "r" (zero), "r" ((uintptr_t)out++)); - } - } + AES_ENCRYPT (b, rounds); + b ^= iv; - /* We want to store iv and ctr big-endian and the unaligned - store stxvb16x stores them little endian, so we have to swap them. */ - iv = swap_if_le (iv); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (iv), "r" (zero), "r" ((uintptr_t)&c->u_iv.iv)); - ctr = swap_if_le (ctr); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (ctr), "r" (zero), "r" ((uintptr_t)&c->u_ctr.ctr)); + VEC_STORE_BE (out, b, bige_const); + + in += 1; + out += 1; + } } else { - const int unroll = 8; - block unalignedprev, ctr, iv; - if (((uintptr_t)inbuf % 16) != 0) + const u128_t *rk = (u128_t *)&ctx->keyschdec; + + if (!ctx->decryption_prepared) { - unalignedprev = vec_ld (0, in++); + aes_ppc8_prepare_decryption (ctx); + ctx->decryption_prepared = 1; } - iv = vec_ld (0, (block*)&c->u_iv.iv); - ctr = vec_ld (0, (block*)&c->u_ctr.ctr); + PRELOAD_ROUND_KEYS (rounds); - for ( ;nblocks >= unroll; nblocks -= unroll) + for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - u64 i = c->u_mode.ocb.data_nblocks + 1; - block l0, l1, l2, l3, l4, l5, l6, l7; - block b0, b1, b2, b3, b4, b5, b6, b7; - block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; - const block *rk = (block*)&ctx->keyschdec; + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); + b = VEC_LOAD_BE (in, bige_const); - c->u_mode.ocb.data_nblocks += unroll; + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + b ^= iv; + AES_DECRYPT (b, rounds); + b ^= iv; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; - iv0 = iv; - if ((uintptr_t)inbuf % 16 == 0) - { - b0 = vec_ld (0, in++); - b1 = vec_ld (0, in++); - b2 = vec_ld (0, in++); - b3 = vec_ld (0, in++); - b4 = vec_ld (0, in++); - b5 = vec_ld (0, in++); - b6 = vec_ld (0, in++); - b7 = vec_ld (0, in++); - } - else - { - block unaligned0, unaligned1, unaligned2, - unaligned3, unaligned4, unaligned5, unaligned6; - unaligned0 = vec_ld (0, in++); - unaligned1 = vec_ld (0, in++); - unaligned2 = vec_ld (0, in++); - unaligned3 = vec_ld (0, in++); - unaligned4 = vec_ld (0, in++); - unaligned5 = vec_ld (0, in++); - unaligned6 = vec_ld (0, in++); - b0 = vec_perm (unalignedprev, unaligned0, vec_lvsl (0, inbuf)); - unalignedprev = vec_ld (0, in++); - b1 = vec_perm (unaligned0, unaligned1, vec_lvsl (0, inbuf)); - b2 = vec_perm (unaligned1, unaligned2, vec_lvsl (0, inbuf)); - b3 = vec_perm (unaligned2, unaligned3, vec_lvsl (0, inbuf)); - b4 = vec_perm (unaligned3, unaligned4, vec_lvsl (0, inbuf)); - b5 = vec_perm (unaligned4, unaligned5, vec_lvsl (0, inbuf)); - b6 = vec_perm (unaligned5, unaligned6, vec_lvsl (0, inbuf)); - b7 = vec_perm (unaligned6, unalignedprev, vec_lvsl (0, inbuf)); - } + VEC_STORE_BE (out, b, bige_const); - l0 = *(block*)ocb_get_l (c, i++); - l1 = *(block*)ocb_get_l (c, i++); - l2 = *(block*)ocb_get_l (c, i++); - l3 = *(block*)ocb_get_l (c, i++); - l4 = *(block*)ocb_get_l (c, i++); - l5 = *(block*)ocb_get_l (c, i++); - l6 = *(block*)ocb_get_l (c, i++); - l7 = *(block*)ocb_get_l (c, i++); + in += 1; + out += 1; + } + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE (in + 0, bige_const); + b1 = VEC_LOAD_BE (in + 1, bige_const); + b2 = VEC_LOAD_BE (in + 2, bige_const); + b3 = VEC_LOAD_BE (in + 3, bige_const); + b4 = VEC_LOAD_BE (in + 4, bige_const); + b5 = VEC_LOAD_BE (in + 5, bige_const); + b6 = VEC_LOAD_BE (in + 6, bige_const); + b7 = VEC_LOAD_BE (in + 7, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l2; + iv4 = iv ^ l1 ^ l2 ^ l0; + iv5 = iv ^ l2 ^ l0; + iv6 = iv ^ l2; + iv7 = iv ^ l2 ^ l; - iv0 ^= l0; b0 ^= iv0; - iv1 = iv0 ^ l1; b1 ^= iv1; - iv2 = iv1 ^ l2; b2 ^= iv2; - iv3 = iv2 ^ l3; b3 ^= iv3; - iv4 = iv3 ^ l4; b4 ^= iv4; - iv5 = iv4 ^ l5; b5 ^= iv5; - iv6 = iv5 ^ l6; b6 ^= iv6; - iv7 = iv6 ^ l7; b7 ^= iv7; - - b0 = swap_if_le (b0); - b1 = swap_if_le (b1); - b2 = swap_if_le (b2); - b3 = swap_if_le (b3); - b4 = swap_if_le (b4); - b5 = swap_if_le (b5); - b6 = swap_if_le (b6); - b7 = swap_if_le (b7); - - b0 ^= rk[0]; - b1 ^= rk[0]; - b2 ^= rk[0]; - b3 ^= rk[0]; - b4 ^= rk[0]; - b5 ^= rk[0]; - b6 ^= rk[0]; - b7 ^= rk[0]; - - for (r = 1;r < rounds;r++) + iv = iv7 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (&rk[r]); \ + b0 = vec_ncipher_be (b0, rkey); \ + b1 = vec_ncipher_be (b1, rkey); \ + b2 = vec_ncipher_be (b2, rkey); \ + b3 = vec_ncipher_be (b3, rkey); \ + b4 = vec_ncipher_be (b4, rkey); \ + b5 = vec_ncipher_be (b5, rkey); \ + b6 = vec_ncipher_be (b6, rkey); \ + b7 = vec_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) { - __asm__ volatile ("vncipher %0, %0, %1\n\t" - :"+v" (b0) - :"v" (rk[r])); - __asm__ volatile ("vncipher %0, %0, %1\n\t" - :"+v" (b1) - :"v" (rk[r])); - __asm__ volatile ("vncipher %0, %0, %1\n\t" - :"+v" (b2) - :"v" (rk[r])); - __asm__ volatile ("vncipher %0, %0, %1\n\t" - :"+v" (b3) - :"v" (rk[r])); - __asm__ volatile ("vncipher %0, %0, %1\n\t" - :"+v" (b4) - :"v" (rk[r])); - __asm__ volatile ("vncipher %0, %0, %1\n\t" - :"+v" (b5) - :"v" (rk[r])); - __asm__ volatile ("vncipher %0, %0, %1\n\t" - :"+v" (b6) - :"v" (rk[r])); - __asm__ volatile ("vncipher %0, %0, %1\n\t" - :"+v" (b7) - :"v" (rk[r])); + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } } - __asm__ volatile ("vncipherlast %0, %0, %1\n\t" - :"+v" (b0) - :"v" (rk[r])); - __asm__ volatile ("vncipherlast %0, %0, %1\n\t" - :"+v" (b1) - :"v" (rk[r])); - __asm__ volatile ("vncipherlast %0, %0, %1\n\t" - :"+v" (b2) - :"v" (rk[r])); - __asm__ volatile ("vncipherlast %0, %0, %1\n\t" - :"+v" (b3) - :"v" (rk[r])); - __asm__ volatile ("vncipherlast %0, %0, %1\n\t" - :"+v" (b4) - :"v" (rk[r])); - __asm__ volatile ("vncipherlast %0, %0, %1\n\t" - :"+v" (b5) - :"v" (rk[r])); - __asm__ volatile ("vncipherlast %0, %0, %1\n\t" - :"+v" (b6) - :"v" (rk[r])); - __asm__ volatile ("vncipherlast %0, %0, %1\n\t" - :"+v" (b7) - :"v" (rk[r])); - - iv = iv7; - - b0 = swap_if_le (b0) ^ iv0; - b1 = swap_if_le (b1) ^ iv1; - b2 = swap_if_le (b2) ^ iv2; - b3 = swap_if_le (b3) ^ iv3; - b4 = swap_if_le (b4) ^ iv4; - b5 = swap_if_le (b5) ^ iv5; - b6 = swap_if_le (b6) ^ iv6; - b7 = swap_if_le (b7) ^ iv7; + +#undef DO_ROUND + + rkey = rkeylast ^ rkey0; + b0 = vec_ncipherlast_be (b0, rkey ^ iv0); + b1 = vec_ncipherlast_be (b1, rkey ^ iv1); + b2 = vec_ncipherlast_be (b2, rkey ^ iv2); + b3 = vec_ncipherlast_be (b3, rkey ^ iv3); + b4 = vec_ncipherlast_be (b4, rkey ^ iv4); + b5 = vec_ncipherlast_be (b5, rkey ^ iv5); + b6 = vec_ncipherlast_be (b6, rkey ^ iv6); + b7 = vec_ncipherlast_be (b7, rkey ^ iv7); + + VEC_STORE_BE (out + 0, b0, bige_const); + VEC_STORE_BE (out + 1, b1, bige_const); + VEC_STORE_BE (out + 2, b2, bige_const); + VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out + 4, b4, bige_const); + VEC_STORE_BE (out + 5, b5, bige_const); + VEC_STORE_BE (out + 6, b6, bige_const); + VEC_STORE_BE (out + 7, b7, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; - /* The unaligned store stxvb16x writes big-endian */ - if ((uintptr_t)outbuf % 16 == 0) - { - vec_vsx_st (b0, 0, out++); - vec_vsx_st (b1, 0, out++); - vec_vsx_st (b2, 0, out++); - vec_vsx_st (b3, 0, out++); - vec_vsx_st (b4, 0, out++); - vec_vsx_st (b5, 0, out++); - vec_vsx_st (b6, 0, out++); - vec_vsx_st (b7, 0, out++); - } - else + in += 8; + out += 8; + } + + if (nblocks >= 4 && (data_nblocks % 4) == 0) + { + b0 = VEC_LOAD_BE (in + 0, bige_const); + b1 = VEC_LOAD_BE (in + 1, bige_const); + b2 = VEC_LOAD_BE (in + 2, bige_const); + b3 = VEC_LOAD_BE (in + 3, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + iv = iv3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (&rk[r]); \ + b0 = vec_ncipher_be (b0, rkey); \ + b1 = vec_ncipher_be (b1, rkey); \ + b2 = vec_ncipher_be (b2, rkey); \ + b3 = vec_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) { - b0 = swap_if_le (b0); - b1 = swap_if_le (b1); - b2 = swap_if_le (b2); - b3 = swap_if_le (b3); - b4 = swap_if_le (b4); - b5 = swap_if_le (b5); - b6 = swap_if_le (b6); - b7 = swap_if_le (b7); - __asm__ ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b0), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b1), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b2), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b3), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b4), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b5), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b6), "r" (zero), "r" ((uintptr_t)(out++))); - __asm__ ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (b7), "r" (zero), "r" ((uintptr_t)(out++))); + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } } + +#undef DO_ROUND + + rkey = rkeylast ^ rkey0; + b0 = vec_ncipherlast_be (b0, rkey ^ iv0); + b1 = vec_ncipherlast_be (b1, rkey ^ iv1); + b2 = vec_ncipherlast_be (b2, rkey ^ iv2); + b3 = vec_ncipherlast_be (b3, rkey ^ iv3); + + VEC_STORE_BE (out + 0, b0, bige_const); + VEC_STORE_BE (out + 1, b1, bige_const); + VEC_STORE_BE (out + 2, b2, bige_const); + VEC_STORE_BE (out + 3, b3, bige_const); + + ctr ^= b0 ^ b1 ^ b2 ^ b3; + + in += 4; + out += 4; + nblocks -= 4; } - for ( ;nblocks; nblocks-- ) + for (; nblocks; nblocks--) { - block b; - u64 i = ++c->u_mode.ocb.data_nblocks; - const block l = *(block*)ocb_get_l (c, i); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); + b = VEC_LOAD_BE (in, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; - if ((uintptr_t)in % 16 == 0) - { - b = vec_ld (0, in++); - } - else - { - block unalignedprevprev; - unalignedprevprev = unalignedprev; - unalignedprev = vec_ld (0, in++); - b = vec_perm (unalignedprevprev, unalignedprev, vec_lvsl (0, inbuf)); - } - - /* Checksum_i = Checksum_{i-1} xor P_i */ - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + b ^= iv; + AES_DECRYPT (b, rounds); b ^= iv; - b = swap_if_le (b); - b = _gcry_aes_ppc8_decrypt_altivec (ctx, b); - b = swap_if_le (b) ^ iv; + /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; - if ((uintptr_t)out % 16 == 0) - { - vec_vsx_st (b, 0, out++); - } - else - { - b = swap_if_le (b); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - : - : "wa" (b), "r" (zero), "r" ((uintptr_t)out++)); - } - } - /* We want to store iv and ctr big-endian and the unaligned - store stxvb16x stores them little endian, so we have to swap them. */ - iv = swap_if_le (iv); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (iv), "r" (zero), "r" ((uintptr_t)&c->u_iv.iv)); - ctr = swap_if_le(ctr); - __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" - :: "wa" (ctr), "r" (zero), "r" ((uintptr_t)&c->u_ctr.ctr)); + VEC_STORE_BE (out, b, bige_const); + + in += 1; + out += 1; + } } + + VEC_STORE_BE (c->u_iv.iv, iv, bige_const); + VEC_STORE_BE (c->u_ctr.ctr, ctr, bige_const); + c->u_mode.ocb.data_nblocks = data_nblocks; + return 0; } -#endif #endif /* USE_PPC_CRYPTO */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 8a27dfe0..c7bc467c 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1,2149 +1,2159 @@ /* Rijndael (AES) for GnuPG * Copyright (C) 2000, 2001, 2002, 2003, 2007, * 2008, 2011, 2012 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see <http://www.gnu.org/licenses/>. ******************************************************************* * The code here is based on the optimized implementation taken from * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, * which carries this notice: *------------------------------------------ * rijndael-alg-fst.c v2.3 April '2000 * * Optimised ANSI C code * * authors: v1.0: Antoon Bosselaers * v2.0: Vincent Rijmen * v2.3: Paulo Barreto * * This code is placed in the public domain. *------------------------------------------ * * The SP800-38a document is available at: * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf * */ #include <config.h> #include <stdio.h> #include <stdlib.h> #include <string.h> /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_AMD64_ASM /* AMD64 assembly implementations of AES */ extern unsigned int _gcry_aes_amd64_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_amd64_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_AMD64_ASM*/ #ifdef USE_AESNI /* AES-NI (AMD64 & i386) accelerated implementations of AES */ extern void _gcry_aes_aesni_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_aesni_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_aesni_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif #ifdef USE_SSSE3 /* SSSE3 (AMD64) vector permutation implementation of AES */ extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_ssse3_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_ssse3_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); #endif #ifdef USE_PADLOCK extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); extern unsigned int _gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); #endif #ifdef USE_ARM_ASM /* ARM assembly implementations of AES */ extern unsigned int _gcry_aes_arm_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_arm_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_ARM_ASM*/ #ifdef USE_ARM_CE /* ARMv8 Crypto Extension implementations of AES */ extern void _gcry_aes_armv8_ce_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_armv8_ce_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_armv8_ce_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_armv8_ce_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_armv8_ce_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif /*USE_ARM_ASM*/ #ifdef USE_PPC_CRYPTO /* PowerPC Crypto implementations of AES */ extern void _gcry_aes_ppc8_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_ppc8_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); +extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); #endif /*USE_PPC_CRYPTO*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); /* All the numbers. */ #include "rijndael-tables.h" /* Function prototypes. */ static const char *selftest(void); /* Prefetching for encryption/decryption tables. */ static inline void prefetch_table(const volatile byte *tab, size_t len) { size_t i; for (i = 0; len - i >= 8 * 32; i += 8 * 32) { (void)tab[i + 0 * 32]; (void)tab[i + 1 * 32]; (void)tab[i + 2 * 32]; (void)tab[i + 3 * 32]; (void)tab[i + 4 * 32]; (void)tab[i + 5 * 32]; (void)tab[i + 6 * 32]; (void)tab[i + 7 * 32]; } for (; i < len; i += 32) { (void)tab[i]; } (void)tab[len - 1]; } static void prefetch_enc(void) { /* Modify counters to trigger copy-on-write and unsharing if physical pages * of look-up table are shared between processes. Modifying counters also * causes checksums for pages to change and hint same-page merging algorithm * that these pages are frequently changing. */ enc_tables.counter_head++; enc_tables.counter_tail++; /* Prefetch look-up tables to cache. */ prefetch_table((const void *)&enc_tables, sizeof(enc_tables)); } static void prefetch_dec(void) { /* Modify counters to trigger copy-on-write and unsharing if physical pages * of look-up table are shared between processes. Modifying counters also * causes checksums for pages to change and hint same-page merging algorithm * that these pages are frequently changing. */ dec_tables.counter_head++; dec_tables.counter_tail++; /* Prefetch look-up tables to cache. */ prefetch_table((const void *)&dec_tables, sizeof(dec_tables)); } /* Perform the key setup. */ static gcry_err_code_t do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, gcry_cipher_hd_t hd) { static int initialized = 0; static const char *selftest_failed = 0; int rounds; int i,j, r, t, rconpointer = 0; int KC; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ || defined(USE_ARM_CE) || defined(USE_PPC_CRYPTO) unsigned int hwfeatures; #endif (void)hd; /* The on-the-fly self tests are only run in non-fips mode. In fips mode explicit self-tests are required. Actually the on-the-fly self-tests are not fully thread-safe and it might happen that a failed self-test won't get noticed in another thread. FIXME: We might want to have a central registry of succeeded self-tests. */ if (!fips_mode () && !initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("%s\n", selftest_failed ); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; if( keylen == 128/8 ) { rounds = 10; KC = 4; } else if ( keylen == 192/8 ) { rounds = 12; KC = 6; } else if ( keylen == 256/8 ) { rounds = 14; KC = 8; } else return GPG_ERR_INV_KEYLEN; ctx->rounds = rounds; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ || defined(USE_ARM_CE) || defined(USE_PPC_CRYPTO) hwfeatures = _gcry_get_hw_features (); #endif ctx->decryption_prepared = 0; #ifdef USE_PADLOCK ctx->use_padlock = 0; #endif #ifdef USE_AESNI ctx->use_aesni = 0; #endif #ifdef USE_SSSE3 ctx->use_ssse3 = 0; #endif #ifdef USE_ARM_CE ctx->use_arm_ce = 0; #endif #ifdef USE_PPC_CRYPTO ctx->use_ppc_crypto = 0; #endif if (0) { ; } #ifdef USE_AESNI else if (hwfeatures & HWF_INTEL_AESNI) { ctx->encrypt_fn = _gcry_aes_aesni_encrypt; ctx->decrypt_fn = _gcry_aes_aesni_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_aesni = 1; ctx->use_avx = !!(hwfeatures & HWF_INTEL_AVX); ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2); if (hd) { hd->bulk.cfb_enc = _gcry_aes_aesni_cfb_enc; hd->bulk.cfb_dec = _gcry_aes_aesni_cfb_dec; hd->bulk.cbc_enc = _gcry_aes_aesni_cbc_enc; hd->bulk.cbc_dec = _gcry_aes_aesni_cbc_dec; hd->bulk.ctr_enc = _gcry_aes_aesni_ctr_enc; hd->bulk.ocb_crypt = _gcry_aes_aesni_ocb_crypt; hd->bulk.ocb_auth = _gcry_aes_aesni_ocb_auth; hd->bulk.xts_crypt = _gcry_aes_aesni_xts_crypt; } } #endif #ifdef USE_PADLOCK else if (hwfeatures & HWF_PADLOCK_AES && keylen == 128/8) { ctx->encrypt_fn = _gcry_aes_padlock_encrypt; ctx->decrypt_fn = _gcry_aes_padlock_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_padlock = 1; memcpy (ctx->padlockkey, key, keylen); } #endif #ifdef USE_SSSE3 else if (hwfeatures & HWF_INTEL_SSSE3) { ctx->encrypt_fn = _gcry_aes_ssse3_encrypt; ctx->decrypt_fn = _gcry_aes_ssse3_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_ssse3 = 1; if (hd) { hd->bulk.cfb_enc = _gcry_aes_ssse3_cfb_enc; hd->bulk.cfb_dec = _gcry_aes_ssse3_cfb_dec; hd->bulk.cbc_enc = _gcry_aes_ssse3_cbc_enc; hd->bulk.cbc_dec = _gcry_aes_ssse3_cbc_dec; hd->bulk.ctr_enc = _gcry_aes_ssse3_ctr_enc; hd->bulk.ocb_crypt = _gcry_aes_ssse3_ocb_crypt; hd->bulk.ocb_auth = _gcry_aes_ssse3_ocb_auth; } } #endif #ifdef USE_ARM_CE else if (hwfeatures & HWF_ARM_AES) { ctx->encrypt_fn = _gcry_aes_armv8_ce_encrypt; ctx->decrypt_fn = _gcry_aes_armv8_ce_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_arm_ce = 1; if (hd) { hd->bulk.cfb_enc = _gcry_aes_armv8_ce_cfb_enc; hd->bulk.cfb_dec = _gcry_aes_armv8_ce_cfb_dec; hd->bulk.cbc_enc = _gcry_aes_armv8_ce_cbc_enc; hd->bulk.cbc_dec = _gcry_aes_armv8_ce_cbc_dec; hd->bulk.ctr_enc = _gcry_aes_armv8_ce_ctr_enc; hd->bulk.ocb_crypt = _gcry_aes_armv8_ce_ocb_crypt; hd->bulk.ocb_auth = _gcry_aes_armv8_ce_ocb_auth; hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt; } } #endif #ifdef USE_PPC_CRYPTO else if (hwfeatures & HWF_PPC_VCRYPTO) { ctx->encrypt_fn = _gcry_aes_ppc8_encrypt; ctx->decrypt_fn = _gcry_aes_ppc8_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_ppc_crypto = 1; if (hd) { + hd->bulk.ocb_crypt = _gcry_aes_ppc8_ocb_crypt; } } #endif else { ctx->encrypt_fn = do_encrypt; ctx->decrypt_fn = do_decrypt; ctx->prefetch_enc_fn = prefetch_enc; ctx->prefetch_dec_fn = prefetch_dec; } /* NB: We don't yet support Padlock hardware key generation. */ if (0) { ; } #ifdef USE_AESNI else if (ctx->use_aesni) _gcry_aes_aesni_do_setkey (ctx, key); #endif #ifdef USE_SSSE3 else if (ctx->use_ssse3) _gcry_aes_ssse3_do_setkey (ctx, key); #endif #ifdef USE_ARM_CE else if (ctx->use_arm_ce) _gcry_aes_armv8_ce_setkey (ctx, key); #endif #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) _gcry_aes_ppc8_setkey (ctx, key); #endif else { const byte *sbox = ((const byte *)encT) + 1; union { PROPERLY_ALIGNED_TYPE dummy; byte data[MAXKC][4]; u32 data32[MAXKC]; } tkk[2]; #define k tkk[0].data #define k_u32 tkk[0].data32 #define tk tkk[1].data #define tk_u32 tkk[1].data32 #define W (ctx->keyschenc) #define W_u32 (ctx->keyschenc32) prefetch_enc(); for (i = 0; i < keylen; i++) { k[i >> 2][i & 3] = key[i]; } for (j = KC-1; j >= 0; j--) { tk_u32[j] = k_u32[j]; } r = 0; t = 0; /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } while (r < rounds + 1) { /* While not enough round key material calculated calculate new values. */ tk[0][0] ^= sbox[tk[KC-1][1] * 4]; tk[0][1] ^= sbox[tk[KC-1][2] * 4]; tk[0][2] ^= sbox[tk[KC-1][3] * 4]; tk[0][3] ^= sbox[tk[KC-1][0] * 4]; tk[0][0] ^= rcon[rconpointer++]; if (KC != 8) { for (j = 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } else { for (j = 1; j < KC/2; j++) { tk_u32[j] ^= tk_u32[j-1]; } tk[KC/2][0] ^= sbox[tk[KC/2 - 1][0] * 4]; tk[KC/2][1] ^= sbox[tk[KC/2 - 1][1] * 4]; tk[KC/2][2] ^= sbox[tk[KC/2 - 1][2] * 4]; tk[KC/2][3] ^= sbox[tk[KC/2 - 1][3] * 4]; for (j = KC/2 + 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } } #undef W #undef tk #undef k #undef W_u32 #undef tk_u32 #undef k_u32 wipememory(&tkk, sizeof(tkk)); } return 0; } static gcry_err_code_t rijndael_setkey (void *context, const byte *key, const unsigned keylen, gcry_cipher_hd_t hd) { RIJNDAEL_context *ctx = context; return do_setkey (ctx, key, keylen, hd); } /* Make a decryption key from an encryption key. */ static void prepare_decryption( RIJNDAEL_context *ctx ) { int r; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_prepare_decryption (ctx); } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_prepare_decryption (ctx); } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_prepare_decryption (ctx); } #endif /*USE_ARM_CE*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_prepare_decryption (ctx); } #endif /*USE_ARM_CE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { _gcry_aes_ppc8_prepare_decryption (ctx); } #endif #ifdef USE_PADLOCK else if (ctx->use_padlock) { /* Padlock does not need decryption subkeys. */ } #endif /*USE_PADLOCK*/ else { const byte *sbox = ((const byte *)encT) + 1; prefetch_enc(); prefetch_dec(); ctx->keyschdec32[0][0] = ctx->keyschenc32[0][0]; ctx->keyschdec32[0][1] = ctx->keyschenc32[0][1]; ctx->keyschdec32[0][2] = ctx->keyschenc32[0][2]; ctx->keyschdec32[0][3] = ctx->keyschenc32[0][3]; for (r = 1; r < ctx->rounds; r++) { u32 *wi = ctx->keyschenc32[r]; u32 *wo = ctx->keyschdec32[r]; u32 wt; wt = wi[0]; wo[0] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[1]; wo[1] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[2]; wo[2] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[3]; wo[3] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); } ctx->keyschdec32[r][0] = ctx->keyschenc32[r][0]; ctx->keyschdec32[r][1] = ctx->keyschenc32[r][1]; ctx->keyschdec32[r][2] = ctx->keyschenc32[r][2]; ctx->keyschdec32[r][3] = ctx->keyschenc32[r][3]; } } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Encrypt one block. A and B may be the same. */ static unsigned int do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschenc32) const byte *sbox = ((const byte *)encT) + 1; int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[0][0]; sa[1] = sb[1] ^ rk[0][1]; sa[2] = sb[2] ^ rk[0][2]; sa[3] = sb[3] ^ rk[0][3]; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; for (r = 2; r < rounds; r++) { sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r++; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } /* Last round is special. */ sb[0] = (sbox[(byte)(sa[0] >> (0 * 8)) * 4]) << (0 * 8); sb[3] = (sbox[(byte)(sa[0] >> (1 * 8)) * 4]) << (1 * 8); sb[2] = (sbox[(byte)(sa[0] >> (2 * 8)) * 4]) << (2 * 8); sb[1] = (sbox[(byte)(sa[0] >> (3 * 8)) * 4]) << (3 * 8); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= (sbox[(byte)(sa[1] >> (0 * 8)) * 4]) << (0 * 8); sa[0] ^= (sbox[(byte)(sa[1] >> (1 * 8)) * 4]) << (1 * 8); sb[3] ^= (sbox[(byte)(sa[1] >> (2 * 8)) * 4]) << (2 * 8); sb[2] ^= (sbox[(byte)(sa[1] >> (3 * 8)) * 4]) << (3 * 8); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= (sbox[(byte)(sa[2] >> (0 * 8)) * 4]) << (0 * 8); sa[1] ^= (sbox[(byte)(sa[2] >> (1 * 8)) * 4]) << (1 * 8); sa[0] ^= (sbox[(byte)(sa[2] >> (2 * 8)) * 4]) << (2 * 8); sb[3] ^= (sbox[(byte)(sa[2] >> (3 * 8)) * 4]) << (3 * 8); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= (sbox[(byte)(sa[3] >> (0 * 8)) * 4]) << (0 * 8); sa[2] ^= (sbox[(byte)(sa[3] >> (1 * 8)) * 4]) << (1 * 8); sa[1] ^= (sbox[(byte)(sa[3] >> (2 * 8)) * 4]) << (2 * 8); sa[0] ^= (sbox[(byte)(sa[3] >> (3 * 8)) * 4]) << (3 * 8); sa[3] = rk[r][3] ^ sb[3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56 + 2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, enc_tables.T); #elif defined(USE_ARM_ASM) return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, enc_tables.T); #else return do_encrypt_fn (ctx, bx, ax); #endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/ } static unsigned int rijndael_encrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); return ctx->encrypt_fn (ctx, b, a); } /* Bulk encryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { /* Encrypt the IV. */ burn_depth = encrypt_fn (ctx, iv, iv); /* XOR the input with the IV and store input into IV. */ cipher_block_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char *last_iv; unsigned int burn_depth = 0; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); return; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); last_iv = iv; for ( ;nblocks; nblocks-- ) { cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, outbuf, outbuf); last_iv = outbuf; inbuf += BLOCKSIZE; if (!cbc_mac) outbuf += BLOCKSIZE; } if (last_iv != iv) cipher_block_cpy (iv, last_iv, BLOCKSIZE); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CTR mode. Caller needs to make sure that CTR is aligned on a 16 byte boundary if AESNI; the minimum alignment is for an u32. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size BLOCKSIZE. */ void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); return; } #endif /*USE_ARM_CE*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ burn_depth = encrypt_fn (ctx, tmp.x1, ctr); /* XOR the input with the encrypted counter and store in output. */ cipher_block_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; /* Increment the counter. */ cipher_block_add(ctr, 1, BLOCKSIZE); } wipememory(&tmp, sizeof(tmp)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Decrypt one block. A and B may be the same. */ static unsigned int do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschdec32) int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[rounds][0]; sa[1] = sb[1] ^ rk[rounds][1]; sa[2] = sb[2] ^ rk[rounds][2]; sa[3] = sb[3] ^ rk[rounds][3]; for (r = rounds - 1; r > 1; r--) { sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r--; sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; /* Last round is special. */ sb[0] = inv_sbox[(byte)(sa[0] >> (0 * 8))] << (0 * 8); sb[1] = inv_sbox[(byte)(sa[0] >> (1 * 8))] << (1 * 8); sb[2] = inv_sbox[(byte)(sa[0] >> (2 * 8))] << (2 * 8); sb[3] = inv_sbox[(byte)(sa[0] >> (3 * 8))] << (3 * 8); sa[0] = sb[0] ^ rk[0][0]; sb[1] ^= inv_sbox[(byte)(sa[1] >> (0 * 8))] << (0 * 8); sb[2] ^= inv_sbox[(byte)(sa[1] >> (1 * 8))] << (1 * 8); sb[3] ^= inv_sbox[(byte)(sa[1] >> (2 * 8))] << (2 * 8); sa[0] ^= inv_sbox[(byte)(sa[1] >> (3 * 8))] << (3 * 8); sa[1] = sb[1] ^ rk[0][1]; sb[2] ^= inv_sbox[(byte)(sa[2] >> (0 * 8))] << (0 * 8); sb[3] ^= inv_sbox[(byte)(sa[2] >> (1 * 8))] << (1 * 8); sa[0] ^= inv_sbox[(byte)(sa[2] >> (2 * 8))] << (2 * 8); sa[1] ^= inv_sbox[(byte)(sa[2] >> (3 * 8))] << (3 * 8); sa[2] = sb[2] ^ rk[0][2]; sb[3] ^= inv_sbox[(byte)(sa[3] >> (0 * 8))] << (0 * 8); sa[0] ^= inv_sbox[(byte)(sa[3] >> (1 * 8))] << (1 * 8); sa[1] ^= inv_sbox[(byte)(sa[3] >> (2 * 8))] << (2 * 8); sa[2] ^= inv_sbox[(byte)(sa[3] >> (3 * 8))] << (3 * 8); sa[3] = sb[3] ^ rk[0][3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56+2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ /* Decrypt one block. AX and BX may be the same. */ static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, dec_tables.T); #elif defined(USE_ARM_ASM) return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, dec_tables.T); #else return do_decrypt_fn (ctx, bx, ax); #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ } static inline void check_decryption_preparation (RIJNDAEL_context *ctx) { if ( !ctx->decryption_prepared ) { prepare_decryption ( ctx ); ctx->decryption_prepared = 1; } } static unsigned int rijndael_decrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); return ctx->decrypt_fn (ctx, b, a); } /* Bulk decryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { burn_depth = encrypt_fn (ctx, iv, iv); cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk decryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); return; } #endif /*USE_ARM_CE*/ else { unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ burn_depth = decrypt_fn (ctx, savebuf, inbuf); cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption/decryption of complete blocks in OCB mode. */ size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { return _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { return _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO + else if (ctx->use_ppc_crypto) + { + return _gcry_aes_ppc8_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + } +#endif /*USE_PPC_CRYPTO*/ else if (encrypt) { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE); cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE); cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk authentication of complete blocks in OCB mode. */ size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned int burn_depth = 0; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { return _gcry_aes_aesni_ocb_auth (c, abuf, nblocks); } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { return _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks); } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { return _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks); } #endif /*USE_ARM_CE*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.aad_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ cipher_block_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); abuf += BLOCKSIZE; } wipememory(&l_tmp, sizeof(l_tmp)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk encryption/decryption of complete blocks in XTS mode. */ void _gcry_aes_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; rijndael_cryptfn_t crypt_fn; u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); return; } #endif /*USE_AESNI*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); return; } #endif /*USE_ARM_CE*/ else { if (encrypt) { if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); crypt_fn = ctx->encrypt_fn; } else { check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); crypt_fn = ctx->decrypt_fn; } tweak_next_lo = buf_get_le64 (tweak + 0); tweak_next_hi = buf_get_le64 (tweak + 8); while (nblocks) { tweak_lo = tweak_next_lo; tweak_hi = tweak_next_hi; /* Xor-Encrypt/Decrypt-Xor block. */ tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo; tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi; buf_put_le64 (outbuf + 0, tmp_lo); buf_put_le64 (outbuf + 8, tmp_hi); /* Generate next tweak. */ carry = -(tweak_next_hi >> 63) & 0x87; tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63); tweak_next_lo = (tweak_next_lo << 1) ^ carry; burn_depth = crypt_fn (ctx, outbuf, outbuf); buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo); buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi); outbuf += GCRY_XTS_BLOCK_LEN; inbuf += GCRY_XTS_BLOCK_LEN; nblocks--; } buf_put_le64 (tweak + 0, tweak_next_lo); buf_put_le64 (tweak + 8, tweak_next_hi); } if (burn_depth) _gcry_burn_stack (burn_depth + 5 * sizeof(void *)); } /* Run the self-tests for AES 128. Returns NULL on success. */ static const char* selftest_basic_128 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; /* The test vectors are from the AES supplied ones; more or less randomly taken from ecb_tbl.txt (I=42,81,14) */ #if 1 static const unsigned char plaintext_128[16] = { 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A }; static const unsigned char key_128[16] = { 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA }; static const unsigned char ciphertext_128[16] = { 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD }; #else /* Test vectors from fips-197, appendix C. */ # warning debug test vectors in use static const unsigned char plaintext_128[16] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff }; static const unsigned char key_128[16] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ }; static const unsigned char ciphertext_128[16] = { 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a }; #endif /* Because gcc/ld can only align the CTX struct on 8 bytes on the stack, we need to allocate that context on the heap. */ ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_128, sizeof (key_128), NULL); rijndael_encrypt (ctx, scratch, plaintext_128); if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) { xfree (ctxmem); return "AES-128 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) return "AES-128 test decryption failed."; return NULL; } /* Run the self-tests for AES 192. Returns NULL on success. */ static const char* selftest_basic_192 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; static unsigned char plaintext_192[16] = { 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 }; static unsigned char key_192[24] = { 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 }; static const unsigned char ciphertext_192[16] = { 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA }; ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_192, sizeof(key_192), NULL); rijndael_encrypt (ctx, scratch, plaintext_192); if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) { xfree (ctxmem); return "AES-192 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) return "AES-192 test decryption failed."; return NULL; } /* Run the self-tests for AES 256. Returns NULL on success. */ static const char* selftest_basic_256 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; static unsigned char plaintext_256[16] = { 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 }; static unsigned char key_256[32] = { 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E }; static const unsigned char ciphertext_256[16] = { 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 }; ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_256, sizeof(key_256), NULL); rijndael_encrypt (ctx, scratch, plaintext_256); if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) { xfree (ctxmem); return "AES-256 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) return "AES-256 test decryption failed."; return NULL; } /* Run the self-tests for AES-CTR-128, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char* selftest_ctr_128 (void) { const int nblocks = 8+1; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_ctr("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_ctr_enc, nblocks, blocksize, context_size); } /* Run the self-tests for AES-CBC-128, tests bulk CBC decryption. Returns NULL on success. */ static const char* selftest_cbc_128 (void) { const int nblocks = 8+2; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_cbc("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_cbc_dec, nblocks, blocksize, context_size); } /* Run the self-tests for AES-CFB-128, tests bulk CFB decryption. Returns NULL on success. */ static const char* selftest_cfb_128 (void) { const int nblocks = 8+2; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_cfb("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_cfb_dec, nblocks, blocksize, context_size); } /* Run all the self-tests and return NULL on success. This function is used for the on-the-fly self-tests. */ static const char * selftest (void) { const char *r; if ( (r = selftest_basic_128 ()) || (r = selftest_basic_192 ()) || (r = selftest_basic_256 ()) ) return r; if ( (r = selftest_ctr_128 ()) ) return r; if ( (r = selftest_cbc_128 ()) ) return r; if ( (r = selftest_cfb_128 ()) ) return r; return r; } /* SP800-38a.pdf for AES-128. */ static const char * selftest_fips_128_38a (int requested_mode) { static const struct tv { int mode; const unsigned char key[16]; const unsigned char iv[16]; struct { const unsigned char input[16]; const unsigned char output[16]; } data[4]; } tv[2] = { { GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } } }, { GCRY_CIPHER_MODE_OFB, { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, } } }; unsigned char scratch[16]; gpg_error_t err; int tvi, idx; gcry_cipher_hd_t hdenc = NULL; gcry_cipher_hd_t hddec = NULL; #define Fail(a) do { \ _gcry_cipher_close (hdenc); \ _gcry_cipher_close (hddec); \ return a; \ } while (0) gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); for (tvi=0; tvi < DIM (tv); tvi++) if (tv[tvi].mode == requested_mode) break; if (tvi == DIM (tv)) Fail ("no test data for this mode"); err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); if (!err) err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); if (err) Fail ("set key"); err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); if (!err) err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); if (err) Fail ("set IV"); for (idx=0; idx < DIM (tv[tvi].data); idx++) { err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, tv[tvi].data[idx].input, sizeof tv[tvi].data[idx].input); if (err) Fail ("encrypt command"); if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) Fail ("encrypt mismatch"); err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, tv[tvi].data[idx].output, sizeof tv[tvi].data[idx].output); if (err) Fail ("decrypt command"); if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) Fail ("decrypt mismatch"); } #undef Fail _gcry_cipher_close (hdenc); _gcry_cipher_close (hddec); return NULL; } /* Complete selftest for AES-128 with all modes and driver code. */ static gpg_err_code_t selftest_fips_128 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "low-level"; errtxt = selftest_basic_128 (); if (errtxt) goto failed; if (extended) { what = "cfb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); if (errtxt) goto failed; what = "ofb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES128, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-192. */ static gpg_err_code_t selftest_fips_192 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_192 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES192, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-256. */ static gpg_err_code_t selftest_fips_256 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_256 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES256, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_CIPHER_AES128: ec = selftest_fips_128 (extended, report); break; case GCRY_CIPHER_AES192: ec = selftest_fips_192 (extended, report); break; case GCRY_CIPHER_AES256: ec = selftest_fips_256 (extended, report); break; default: ec = GPG_ERR_CIPHER_ALGO; break; } return ec; } static const char *rijndael_names[] = { "RIJNDAEL", "AES128", "AES-128", NULL }; static gcry_cipher_oid_spec_t rijndael_oids[] = { { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes = { GCRY_CIPHER_AES, {0, 1}, "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael192_names[] = { "RIJNDAEL192", "AES-192", NULL }; static gcry_cipher_oid_spec_t rijndael192_oids[] = { { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes192 = { GCRY_CIPHER_AES192, {0, 1}, "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael256_names[] = { "RIJNDAEL256", "AES-256", NULL }; static gcry_cipher_oid_spec_t rijndael256_oids[] = { { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes256 = { GCRY_CIPHER_AES256, {0, 1}, "AES256", rijndael256_names, rijndael256_oids, 16, 256, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests };