diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h index 063c5358..8a05d3c9 100644 --- a/cipher/rijndael-ppc-functions.h +++ b/cipher/rijndael-ppc-functions.h @@ -1,2037 +1,2294 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden * Copyright (C) 2019-2020, 2022 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, * and Cryptogams by Andy Polyakov, and if made part of a release of either * or both projects, is thereafter dual-licensed under the license said project * is released under. */ unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out, const unsigned char *in) { const block bige_const = asm_load_be_const(); const u128_t *rk = (u128_t *)&ctx->keyschenc; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block b; b = VEC_LOAD_BE (in, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); AES_ENCRYPT (b, rounds); VEC_STORE_BE (out, 0, b, bige_const); return 0; /* does not use stack */ } unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out, const unsigned char *in) { const block bige_const = asm_load_be_const(); const u128_t *rk = (u128_t *)&ctx->keyschdec; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block b; b = VEC_LOAD_BE (in, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); AES_DECRYPT (b, rounds); VEC_STORE_BE (out, 0, b, bige_const); return 0; /* does not use stack */ } void CFB_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; ROUND_KEY_VARIABLES_ALL; block key0_xor_keylast; block iv, outiv; iv = VEC_LOAD_BE (iv_arg, 0, bige_const); outiv = iv; PRELOAD_ROUND_KEYS_ALL (rounds); key0_xor_keylast = rkey0 ^ rkeylast; iv ^= rkey0; for (; nblocks; nblocks--) { rkeylast = key0_xor_keylast ^ VEC_LOAD_BE (in++, 0, bige_const); iv = asm_cipher_be (iv, rkey1); iv = asm_cipher_be (iv, rkey2); iv = asm_cipher_be (iv, rkey3); iv = asm_cipher_be (iv, rkey4); iv = asm_cipher_be (iv, rkey5); iv = asm_cipher_be (iv, rkey6); iv = asm_cipher_be (iv, rkey7); iv = asm_cipher_be (iv, rkey8); iv = asm_cipher_be (iv, rkey9); if (rounds >= 12) { iv = asm_cipher_be (iv, rkey10); iv = asm_cipher_be (iv, rkey11); if (rounds > 12) { iv = asm_cipher_be (iv, rkey12); iv = asm_cipher_be (iv, rkey13); } } iv = asm_cipherlast_be (iv, rkeylast); outiv = rkey0 ^ iv; VEC_STORE_BE (out++, 0, outiv, bige_const); } VEC_STORE_BE (iv_arg, 0, outiv, bige_const); } + +void ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = encrypt ? (u128_t *)&ctx->keyschenc + : (u128_t *)&ctx->keyschdec; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block b0, b1, b2, b3, b4, b5, b6, b7; + block rkey; + + if (!encrypt && !ctx->decryption_prepared) + { + internal_aes_ppc_prepare_decryption (ctx); + ctx->decryption_prepared = 1; + } + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + b0 = asm_xor (rkey0, b0); + b1 = asm_xor (rkey0, b1); + b4 = VEC_LOAD_BE (in, 4, bige_const); + b5 = VEC_LOAD_BE (in, 5, bige_const); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + b6 = VEC_LOAD_BE (in, 6, bige_const); + b7 = VEC_LOAD_BE (in, 7, bige_const); + in += 8; + b4 = asm_xor (rkey0, b4); + b5 = asm_xor (rkey0, b5); + b6 = asm_xor (rkey0, b6); + b7 = asm_xor (rkey0, b7); + + if (encrypt) + { +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_cipherlast_be (b0, rkeylast); + b1 = asm_cipherlast_be (b1, rkeylast); + b2 = asm_cipherlast_be (b2, rkeylast); + b3 = asm_cipherlast_be (b3, rkeylast); + b4 = asm_cipherlast_be (b4, rkeylast); + b5 = asm_cipherlast_be (b5, rkeylast); + b6 = asm_cipherlast_be (b6, rkeylast); + b7 = asm_cipherlast_be (b7, rkeylast); + } + else + { +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_ncipherlast_be (b0, rkeylast); + b1 = asm_ncipherlast_be (b1, rkeylast); + b2 = asm_ncipherlast_be (b2, rkeylast); + b3 = asm_ncipherlast_be (b3, rkeylast); + b4 = asm_ncipherlast_be (b4, rkeylast); + b5 = asm_ncipherlast_be (b5, rkeylast); + b6 = asm_ncipherlast_be (b6, rkeylast); + b7 = asm_ncipherlast_be (b7, rkeylast); + } + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + VEC_STORE_BE (out, 4, b4, bige_const); + VEC_STORE_BE (out, 5, b5, bige_const); + VEC_STORE_BE (out, 6, b6, bige_const); + VEC_STORE_BE (out, 7, b7, bige_const); + out += 8; + } + + if (nblocks >= 4) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + b0 = asm_xor (rkey0, b0); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + + if (encrypt) + { +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } +#undef DO_ROUND + + b0 = asm_cipherlast_be (b0, rkeylast); + b1 = asm_cipherlast_be (b1, rkeylast); + b2 = asm_cipherlast_be (b2, rkeylast); + b3 = asm_cipherlast_be (b3, rkeylast); + } + else + { +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } +#undef DO_ROUND + + b0 = asm_ncipherlast_be (b0, rkeylast); + b1 = asm_ncipherlast_be (b1, rkeylast); + b2 = asm_ncipherlast_be (b2, rkeylast); + b3 = asm_ncipherlast_be (b3, rkeylast); + } + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + + if (encrypt) + { + AES_ENCRYPT (b0, rounds); + } + else + { + AES_DECRYPT (b0, rounds); + } + + VEC_STORE_BE (out, 0, b0, bige_const); + + out++; + in++; + } +} + + void CFB_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block rkeylast_orig; block iv, b, bin; block in0, in1, in2, in3, in4, in5, in6, in7; block b0, b1, b2, b3, b4, b5, b6, b7; block rkey; iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; for (; nblocks >= 8; nblocks -= 8) { in0 = iv; in1 = VEC_LOAD_BE_NOSWAP (in, 0); in2 = VEC_LOAD_BE_NOSWAP (in, 1); in3 = VEC_LOAD_BE_NOSWAP (in, 2); in4 = VEC_LOAD_BE_NOSWAP (in, 3); in1 = VEC_BE_SWAP (in1, bige_const); in2 = VEC_BE_SWAP (in2, bige_const); in5 = VEC_LOAD_BE_NOSWAP (in, 4); in6 = VEC_LOAD_BE_NOSWAP (in, 5); in3 = VEC_BE_SWAP (in3, bige_const); in4 = VEC_BE_SWAP (in4, bige_const); in7 = VEC_LOAD_BE_NOSWAP (in, 6); iv = VEC_LOAD_BE_NOSWAP (in, 7); in += 8; in5 = VEC_BE_SWAP (in5, bige_const); in6 = VEC_BE_SWAP (in6, bige_const); b0 = asm_xor (rkey0, in0); b1 = asm_xor (rkey0, in1); in7 = VEC_BE_SWAP (in7, bige_const); iv = VEC_BE_SWAP (iv, bige_const); b2 = asm_xor (rkey0, in2); b3 = asm_xor (rkey0, in3); b4 = asm_xor (rkey0, in4); b5 = asm_xor (rkey0, in5); b6 = asm_xor (rkey0, in6); b7 = asm_xor (rkey0, in7); #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); \ b4 = asm_cipher_be (b4, rkey); \ b5 = asm_cipher_be (b5, rkey); \ b6 = asm_cipher_be (b6, rkey); \ b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND in1 = asm_xor (rkeylast, in1); in2 = asm_xor (rkeylast, in2); in3 = asm_xor (rkeylast, in3); in4 = asm_xor (rkeylast, in4); b0 = asm_cipherlast_be (b0, in1); b1 = asm_cipherlast_be (b1, in2); in5 = asm_xor (rkeylast, in5); in6 = asm_xor (rkeylast, in6); b2 = asm_cipherlast_be (b2, in3); b3 = asm_cipherlast_be (b3, in4); in7 = asm_xor (rkeylast, in7); in0 = asm_xor (rkeylast, iv); b0 = VEC_BE_SWAP (b0, bige_const); b1 = VEC_BE_SWAP (b1, bige_const); b4 = asm_cipherlast_be (b4, in5); b5 = asm_cipherlast_be (b5, in6); b2 = VEC_BE_SWAP (b2, bige_const); b3 = VEC_BE_SWAP (b3, bige_const); b6 = asm_cipherlast_be (b6, in7); b7 = asm_cipherlast_be (b7, in0); b4 = VEC_BE_SWAP (b4, bige_const); b5 = VEC_BE_SWAP (b5, bige_const); b6 = VEC_BE_SWAP (b6, bige_const); b7 = VEC_BE_SWAP (b7, bige_const); VEC_STORE_BE_NOSWAP (out, 0, b0); VEC_STORE_BE_NOSWAP (out, 1, b1); VEC_STORE_BE_NOSWAP (out, 2, b2); VEC_STORE_BE_NOSWAP (out, 3, b3); VEC_STORE_BE_NOSWAP (out, 4, b4); VEC_STORE_BE_NOSWAP (out, 5, b5); VEC_STORE_BE_NOSWAP (out, 6, b6); VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { in0 = iv; in1 = VEC_LOAD_BE (in, 0, bige_const); in2 = VEC_LOAD_BE (in, 1, bige_const); in3 = VEC_LOAD_BE (in, 2, bige_const); iv = VEC_LOAD_BE (in, 3, bige_const); b0 = asm_xor (rkey0, in0); b1 = asm_xor (rkey0, in1); b2 = asm_xor (rkey0, in2); b3 = asm_xor (rkey0, in3); #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND in1 = asm_xor (rkeylast, in1); in2 = asm_xor (rkeylast, in2); in3 = asm_xor (rkeylast, in3); in0 = asm_xor (rkeylast, iv); b0 = asm_cipherlast_be (b0, in1); b1 = asm_cipherlast_be (b1, in2); b2 = asm_cipherlast_be (b2, in3); b3 = asm_cipherlast_be (b3, in0); VEC_STORE_BE (out, 0, b0, bige_const); VEC_STORE_BE (out, 1, b1, bige_const); VEC_STORE_BE (out, 2, b2, bige_const); VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; nblocks -= 4; } for (; nblocks; nblocks--) { bin = VEC_LOAD_BE (in, 0, bige_const); rkeylast = rkeylast_orig ^ bin; b = iv; iv = bin; AES_ENCRYPT (b, rounds); VEC_STORE_BE (out, 0, b, bige_const); out++; in++; } VEC_STORE_BE (iv_arg, 0, iv, bige_const); } void CBC_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; byte *out = (byte *)outbuf_arg; int rounds = ctx->rounds; ROUND_KEY_VARIABLES_ALL; block iv, key0_xor_keylast, nextiv, outiv; unsigned int outadd = -(!cbc_mac) & 16; if (nblocks == 0) /* CMAC may call with nblocks 0. */ return; iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS_ALL (rounds); key0_xor_keylast = rkey0 ^ rkeylast; nextiv = VEC_LOAD_BE (in++, 0, bige_const); iv ^= rkey0 ^ nextiv; do { if (--nblocks) { nextiv = key0_xor_keylast ^ VEC_LOAD_BE (in++, 0, bige_const); } iv = asm_cipher_be (iv, rkey1); iv = asm_cipher_be (iv, rkey2); iv = asm_cipher_be (iv, rkey3); iv = asm_cipher_be (iv, rkey4); iv = asm_cipher_be (iv, rkey5); iv = asm_cipher_be (iv, rkey6); iv = asm_cipher_be (iv, rkey7); iv = asm_cipher_be (iv, rkey8); iv = asm_cipher_be (iv, rkey9); if (rounds >= 12) { iv = asm_cipher_be (iv, rkey10); iv = asm_cipher_be (iv, rkey11); if (rounds > 12) { iv = asm_cipher_be (iv, rkey12); iv = asm_cipher_be (iv, rkey13); } } outiv = iv; /* Proper order for following instructions is important for best * performance on POWER8: the output path vcipherlast needs to be * last one. */ __asm__ volatile ("vcipherlast %0, %0, %2\n\t" "vcipherlast %1, %1, %3\n\t" : "+v" (iv), "+v" (outiv) : "v" (nextiv), "v" (rkeylast)); VEC_STORE_BE ((u128_t *)out, 0, outiv, bige_const); out += outadd; } while (nblocks); VEC_STORE_BE (iv_arg, 0, outiv, bige_const); } void CBC_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschdec; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block rkeylast_orig; block in0, in1, in2, in3, in4, in5, in6, in7; block b0, b1, b2, b3, b4, b5, b6, b7; block rkey; block iv, b; if (!ctx->decryption_prepared) { internal_aes_ppc_prepare_decryption (ctx); ctx->decryption_prepared = 1; } iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; for (; nblocks >= 8; nblocks -= 8) { in0 = VEC_LOAD_BE_NOSWAP (in, 0); in1 = VEC_LOAD_BE_NOSWAP (in, 1); in2 = VEC_LOAD_BE_NOSWAP (in, 2); in3 = VEC_LOAD_BE_NOSWAP (in, 3); in0 = VEC_BE_SWAP (in0, bige_const); in1 = VEC_BE_SWAP (in1, bige_const); in4 = VEC_LOAD_BE_NOSWAP (in, 4); in5 = VEC_LOAD_BE_NOSWAP (in, 5); in2 = VEC_BE_SWAP (in2, bige_const); in3 = VEC_BE_SWAP (in3, bige_const); in6 = VEC_LOAD_BE_NOSWAP (in, 6); in7 = VEC_LOAD_BE_NOSWAP (in, 7); in += 8; b0 = asm_xor (rkey0, in0); b1 = asm_xor (rkey0, in1); in4 = VEC_BE_SWAP (in4, bige_const); in5 = VEC_BE_SWAP (in5, bige_const); b2 = asm_xor (rkey0, in2); b3 = asm_xor (rkey0, in3); in6 = VEC_BE_SWAP (in6, bige_const); in7 = VEC_BE_SWAP (in7, bige_const); b4 = asm_xor (rkey0, in4); b5 = asm_xor (rkey0, in5); b6 = asm_xor (rkey0, in6); b7 = asm_xor (rkey0, in7); #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_ncipher_be (b0, rkey); \ b1 = asm_ncipher_be (b1, rkey); \ b2 = asm_ncipher_be (b2, rkey); \ b3 = asm_ncipher_be (b3, rkey); \ b4 = asm_ncipher_be (b4, rkey); \ b5 = asm_ncipher_be (b5, rkey); \ b6 = asm_ncipher_be (b6, rkey); \ b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND iv = asm_xor (rkeylast, iv); in0 = asm_xor (rkeylast, in0); in1 = asm_xor (rkeylast, in1); in2 = asm_xor (rkeylast, in2); b0 = asm_ncipherlast_be (b0, iv); iv = in7; b1 = asm_ncipherlast_be (b1, in0); in3 = asm_xor (rkeylast, in3); in4 = asm_xor (rkeylast, in4); b2 = asm_ncipherlast_be (b2, in1); b3 = asm_ncipherlast_be (b3, in2); in5 = asm_xor (rkeylast, in5); in6 = asm_xor (rkeylast, in6); b0 = VEC_BE_SWAP (b0, bige_const); b1 = VEC_BE_SWAP (b1, bige_const); b4 = asm_ncipherlast_be (b4, in3); b5 = asm_ncipherlast_be (b5, in4); b2 = VEC_BE_SWAP (b2, bige_const); b3 = VEC_BE_SWAP (b3, bige_const); b6 = asm_ncipherlast_be (b6, in5); b7 = asm_ncipherlast_be (b7, in6); b4 = VEC_BE_SWAP (b4, bige_const); b5 = VEC_BE_SWAP (b5, bige_const); b6 = VEC_BE_SWAP (b6, bige_const); b7 = VEC_BE_SWAP (b7, bige_const); VEC_STORE_BE_NOSWAP (out, 0, b0); VEC_STORE_BE_NOSWAP (out, 1, b1); VEC_STORE_BE_NOSWAP (out, 2, b2); VEC_STORE_BE_NOSWAP (out, 3, b3); VEC_STORE_BE_NOSWAP (out, 4, b4); VEC_STORE_BE_NOSWAP (out, 5, b5); VEC_STORE_BE_NOSWAP (out, 6, b6); VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { in0 = VEC_LOAD_BE (in, 0, bige_const); in1 = VEC_LOAD_BE (in, 1, bige_const); in2 = VEC_LOAD_BE (in, 2, bige_const); in3 = VEC_LOAD_BE (in, 3, bige_const); b0 = asm_xor (rkey0, in0); b1 = asm_xor (rkey0, in1); b2 = asm_xor (rkey0, in2); b3 = asm_xor (rkey0, in3); #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_ncipher_be (b0, rkey); \ b1 = asm_ncipher_be (b1, rkey); \ b2 = asm_ncipher_be (b2, rkey); \ b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND iv = asm_xor (rkeylast, iv); in0 = asm_xor (rkeylast, in0); in1 = asm_xor (rkeylast, in1); in2 = asm_xor (rkeylast, in2); b0 = asm_ncipherlast_be (b0, iv); iv = in3; b1 = asm_ncipherlast_be (b1, in0); b2 = asm_ncipherlast_be (b2, in1); b3 = asm_ncipherlast_be (b3, in2); VEC_STORE_BE (out, 0, b0, bige_const); VEC_STORE_BE (out, 1, b1, bige_const); VEC_STORE_BE (out, 2, b2, bige_const); VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; nblocks -= 4; } for (; nblocks; nblocks--) { rkeylast = rkeylast_orig ^ iv; iv = VEC_LOAD_BE (in, 0, bige_const); b = iv; AES_DECRYPT (b, rounds); VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } VEC_STORE_BE (iv_arg, 0, iv, bige_const); } void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { static const unsigned char vec_one_const[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block rkeylast_orig; block ctr, b, one; ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const); one = VEC_LOAD_BE (&vec_one_const, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; if (nblocks >= 4) { block in0, in1, in2, in3, in4, in5, in6, in7; block b0, b1, b2, b3, b4, b5, b6, b7; block two, three, four; block rkey; two = asm_add_uint128 (one, one); three = asm_add_uint128 (two, one); four = asm_add_uint128 (two, two); for (; nblocks >= 8; nblocks -= 8) { b1 = asm_add_uint128 (ctr, one); b2 = asm_add_uint128 (ctr, two); b3 = asm_add_uint128 (ctr, three); b4 = asm_add_uint128 (ctr, four); b5 = asm_add_uint128 (b1, four); b6 = asm_add_uint128 (b2, four); b7 = asm_add_uint128 (b3, four); b0 = asm_xor (rkey0, ctr); rkey = ALIGNED_LOAD (rk, 1); ctr = asm_add_uint128 (b4, four); b1 = asm_xor (rkey0, b1); b2 = asm_xor (rkey0, b2); b3 = asm_xor (rkey0, b3); b0 = asm_cipher_be (b0, rkey); b1 = asm_cipher_be (b1, rkey); b2 = asm_cipher_be (b2, rkey); b3 = asm_cipher_be (b3, rkey); b4 = asm_xor (rkey0, b4); b5 = asm_xor (rkey0, b5); b6 = asm_xor (rkey0, b6); b7 = asm_xor (rkey0, b7); b4 = asm_cipher_be (b4, rkey); b5 = asm_cipher_be (b5, rkey); b6 = asm_cipher_be (b6, rkey); b7 = asm_cipher_be (b7, rkey); #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); \ b4 = asm_cipher_be (b4, rkey); \ b5 = asm_cipher_be (b5, rkey); \ b6 = asm_cipher_be (b6, rkey); \ b7 = asm_cipher_be (b7, rkey); in0 = VEC_LOAD_BE_NOSWAP (in, 0); DO_ROUND(2); in1 = VEC_LOAD_BE_NOSWAP (in, 1); DO_ROUND(3); in2 = VEC_LOAD_BE_NOSWAP (in, 2); DO_ROUND(4); in3 = VEC_LOAD_BE_NOSWAP (in, 3); DO_ROUND(5); in4 = VEC_LOAD_BE_NOSWAP (in, 4); DO_ROUND(6); in5 = VEC_LOAD_BE_NOSWAP (in, 5); DO_ROUND(7); in6 = VEC_LOAD_BE_NOSWAP (in, 6); DO_ROUND(8); in7 = VEC_LOAD_BE_NOSWAP (in, 7); in += 8; DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND in0 = VEC_BE_SWAP (in0, bige_const); in1 = VEC_BE_SWAP (in1, bige_const); in2 = VEC_BE_SWAP (in2, bige_const); in3 = VEC_BE_SWAP (in3, bige_const); in4 = VEC_BE_SWAP (in4, bige_const); in5 = VEC_BE_SWAP (in5, bige_const); in6 = VEC_BE_SWAP (in6, bige_const); in7 = VEC_BE_SWAP (in7, bige_const); in0 = asm_xor (rkeylast, in0); in1 = asm_xor (rkeylast, in1); in2 = asm_xor (rkeylast, in2); in3 = asm_xor (rkeylast, in3); b0 = asm_cipherlast_be (b0, in0); b1 = asm_cipherlast_be (b1, in1); in4 = asm_xor (rkeylast, in4); in5 = asm_xor (rkeylast, in5); b2 = asm_cipherlast_be (b2, in2); b3 = asm_cipherlast_be (b3, in3); in6 = asm_xor (rkeylast, in6); in7 = asm_xor (rkeylast, in7); b4 = asm_cipherlast_be (b4, in4); b5 = asm_cipherlast_be (b5, in5); b6 = asm_cipherlast_be (b6, in6); b7 = asm_cipherlast_be (b7, in7); b0 = VEC_BE_SWAP (b0, bige_const); b1 = VEC_BE_SWAP (b1, bige_const); b2 = VEC_BE_SWAP (b2, bige_const); b3 = VEC_BE_SWAP (b3, bige_const); b4 = VEC_BE_SWAP (b4, bige_const); b5 = VEC_BE_SWAP (b5, bige_const); b6 = VEC_BE_SWAP (b6, bige_const); b7 = VEC_BE_SWAP (b7, bige_const); VEC_STORE_BE_NOSWAP (out, 0, b0); VEC_STORE_BE_NOSWAP (out, 1, b1); VEC_STORE_BE_NOSWAP (out, 2, b2); VEC_STORE_BE_NOSWAP (out, 3, b3); VEC_STORE_BE_NOSWAP (out, 4, b4); VEC_STORE_BE_NOSWAP (out, 5, b5); VEC_STORE_BE_NOSWAP (out, 6, b6); VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { b1 = asm_add_uint128 (ctr, one); b2 = asm_add_uint128 (ctr, two); b3 = asm_add_uint128 (ctr, three); b0 = asm_xor (rkey0, ctr); ctr = asm_add_uint128 (ctr, four); b1 = asm_xor (rkey0, b1); b2 = asm_xor (rkey0, b2); b3 = asm_xor (rkey0, b3); #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); in0 = VEC_LOAD_BE (in, 0, bige_const); in1 = VEC_LOAD_BE (in, 1, bige_const); in2 = VEC_LOAD_BE (in, 2, bige_const); in3 = VEC_LOAD_BE (in, 3, bige_const); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND in0 = asm_xor (rkeylast, in0); in1 = asm_xor (rkeylast, in1); in2 = asm_xor (rkeylast, in2); in3 = asm_xor (rkeylast, in3); b0 = asm_cipherlast_be (b0, in0); b1 = asm_cipherlast_be (b1, in1); b2 = asm_cipherlast_be (b2, in2); b3 = asm_cipherlast_be (b3, in3); VEC_STORE_BE (out, 0, b0, bige_const); VEC_STORE_BE (out, 1, b1, bige_const); VEC_STORE_BE (out, 2, b2, bige_const); VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; nblocks -= 4; } } for (; nblocks; nblocks--) { b = ctr; ctr = asm_add_uint128 (ctr, one); rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); AES_ENCRYPT (b, rounds); VEC_STORE_BE (out, 0, b, bige_const); out++; in++; } VEC_STORE_BE (ctr_arg, 0, ctr, bige_const); } size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; u64 data_nblocks = c->u_mode.ocb.data_nblocks; block l0, l1, l2, l; block b0, b1, b2, b3, b4, b5, b6, b7, b; block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; block rkey, rkeylf; block ctr, iv; ROUND_KEY_VARIABLES; iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const); ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const); l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); if (encrypt) { const u128_t *rk = (u128_t *)&ctx->keyschenc; PRELOAD_ROUND_KEYS (rounds); for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ b ^= iv; AES_ENCRYPT (b, rounds); b ^= iv; VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; } for (; nblocks >= 8; nblocks -= 8) { b0 = VEC_LOAD_BE_NOSWAP (in, 0); b1 = VEC_LOAD_BE_NOSWAP (in, 1); b2 = VEC_LOAD_BE_NOSWAP (in, 2); b3 = VEC_LOAD_BE_NOSWAP (in, 3); b4 = VEC_LOAD_BE_NOSWAP (in, 4); b5 = VEC_LOAD_BE_NOSWAP (in, 5); b6 = VEC_LOAD_BE_NOSWAP (in, 6); b7 = VEC_LOAD_BE_NOSWAP (in, 7); in += 8; l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); b0 = VEC_BE_SWAP(b0, bige_const); b1 = VEC_BE_SWAP(b1, bige_const); b2 = VEC_BE_SWAP(b2, bige_const); b3 = VEC_BE_SWAP(b3, bige_const); b4 = VEC_BE_SWAP(b4, bige_const); b5 = VEC_BE_SWAP(b5, bige_const); b6 = VEC_BE_SWAP(b6, bige_const); b7 = VEC_BE_SWAP(b7, bige_const); l = VEC_BE_SWAP(l, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; iv ^= rkey0; iv0 = iv ^ l0; iv1 = iv ^ l0 ^ l1; iv2 = iv ^ l1; iv3 = iv ^ l1 ^ l2; iv4 = iv ^ l1 ^ l2 ^ l0; iv5 = iv ^ l2 ^ l0; iv6 = iv ^ l2; iv7 = iv ^ l2 ^ l; b0 ^= iv0; b1 ^= iv1; b2 ^= iv2; b3 ^= iv3; b4 ^= iv4; b5 ^= iv5; b6 ^= iv6; b7 ^= iv7; iv = iv7 ^ rkey0; #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); \ b4 = asm_cipher_be (b4, rkey); \ b5 = asm_cipher_be (b5, rkey); \ b6 = asm_cipher_be (b6, rkey); \ b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); rkeylf = asm_xor (rkeylast, rkey0); DO_ROUND(8); iv0 = asm_xor (rkeylf, iv0); iv1 = asm_xor (rkeylf, iv1); iv2 = asm_xor (rkeylf, iv2); iv3 = asm_xor (rkeylf, iv3); iv4 = asm_xor (rkeylf, iv4); iv5 = asm_xor (rkeylf, iv5); iv6 = asm_xor (rkeylf, iv6); iv7 = asm_xor (rkeylf, iv7); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND b0 = asm_cipherlast_be (b0, iv0); b1 = asm_cipherlast_be (b1, iv1); b2 = asm_cipherlast_be (b2, iv2); b3 = asm_cipherlast_be (b3, iv3); b4 = asm_cipherlast_be (b4, iv4); b5 = asm_cipherlast_be (b5, iv5); b6 = asm_cipherlast_be (b6, iv6); b7 = asm_cipherlast_be (b7, iv7); b0 = VEC_BE_SWAP (b0, bige_const); b1 = VEC_BE_SWAP (b1, bige_const); b2 = VEC_BE_SWAP (b2, bige_const); b3 = VEC_BE_SWAP (b3, bige_const); b4 = VEC_BE_SWAP (b4, bige_const); b5 = VEC_BE_SWAP (b5, bige_const); b6 = VEC_BE_SWAP (b6, bige_const); b7 = VEC_BE_SWAP (b7, bige_const); VEC_STORE_BE_NOSWAP (out, 0, b0); VEC_STORE_BE_NOSWAP (out, 1, b1); VEC_STORE_BE_NOSWAP (out, 2, b2); VEC_STORE_BE_NOSWAP (out, 3, b3); VEC_STORE_BE_NOSWAP (out, 4, b4); VEC_STORE_BE_NOSWAP (out, 5, b5); VEC_STORE_BE_NOSWAP (out, 6, b6); VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { b0 = VEC_LOAD_BE (in, 0, bige_const); b1 = VEC_LOAD_BE (in, 1, bige_const); b2 = VEC_LOAD_BE (in, 2, bige_const); b3 = VEC_LOAD_BE (in, 3, bige_const); l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3; iv ^= rkey0; iv0 = iv ^ l0; iv1 = iv ^ l0 ^ l1; iv2 = iv ^ l1; iv3 = iv ^ l1 ^ l; b0 ^= iv0; b1 ^= iv1; b2 ^= iv2; b3 ^= iv3; iv = iv3 ^ rkey0; #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND rkey = rkeylast ^ rkey0; b0 = asm_cipherlast_be (b0, rkey ^ iv0); b1 = asm_cipherlast_be (b1, rkey ^ iv1); b2 = asm_cipherlast_be (b2, rkey ^ iv2); b3 = asm_cipherlast_be (b3, rkey ^ iv3); VEC_STORE_BE (out, 0, b0, bige_const); VEC_STORE_BE (out, 1, b1, bige_const); VEC_STORE_BE (out, 2, b2, bige_const); VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; nblocks -= 4; } for (; nblocks; nblocks--) { l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ b ^= iv; AES_ENCRYPT (b, rounds); b ^= iv; VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; } } else { const u128_t *rk = (u128_t *)&ctx->keyschdec; if (!ctx->decryption_prepared) { internal_aes_ppc_prepare_decryption (ctx); ctx->decryption_prepared = 1; } PRELOAD_ROUND_KEYS (rounds); for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ b ^= iv; AES_DECRYPT (b, rounds); b ^= iv; /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; } for (; nblocks >= 8; nblocks -= 8) { b0 = VEC_LOAD_BE_NOSWAP (in, 0); b1 = VEC_LOAD_BE_NOSWAP (in, 1); b2 = VEC_LOAD_BE_NOSWAP (in, 2); b3 = VEC_LOAD_BE_NOSWAP (in, 3); b4 = VEC_LOAD_BE_NOSWAP (in, 4); b5 = VEC_LOAD_BE_NOSWAP (in, 5); b6 = VEC_LOAD_BE_NOSWAP (in, 6); b7 = VEC_LOAD_BE_NOSWAP (in, 7); in += 8; l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); b0 = VEC_BE_SWAP(b0, bige_const); b1 = VEC_BE_SWAP(b1, bige_const); b2 = VEC_BE_SWAP(b2, bige_const); b3 = VEC_BE_SWAP(b3, bige_const); b4 = VEC_BE_SWAP(b4, bige_const); b5 = VEC_BE_SWAP(b5, bige_const); b6 = VEC_BE_SWAP(b6, bige_const); b7 = VEC_BE_SWAP(b7, bige_const); l = VEC_BE_SWAP(l, bige_const); iv ^= rkey0; iv0 = iv ^ l0; iv1 = iv ^ l0 ^ l1; iv2 = iv ^ l1; iv3 = iv ^ l1 ^ l2; iv4 = iv ^ l1 ^ l2 ^ l0; iv5 = iv ^ l2 ^ l0; iv6 = iv ^ l2; iv7 = iv ^ l2 ^ l; b0 ^= iv0; b1 ^= iv1; b2 ^= iv2; b3 ^= iv3; b4 ^= iv4; b5 ^= iv5; b6 ^= iv6; b7 ^= iv7; iv = iv7 ^ rkey0; #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_ncipher_be (b0, rkey); \ b1 = asm_ncipher_be (b1, rkey); \ b2 = asm_ncipher_be (b2, rkey); \ b3 = asm_ncipher_be (b3, rkey); \ b4 = asm_ncipher_be (b4, rkey); \ b5 = asm_ncipher_be (b5, rkey); \ b6 = asm_ncipher_be (b6, rkey); \ b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); rkeylf = asm_xor (rkeylast, rkey0); DO_ROUND(8); iv0 = asm_xor (rkeylf, iv0); iv1 = asm_xor (rkeylf, iv1); iv2 = asm_xor (rkeylf, iv2); iv3 = asm_xor (rkeylf, iv3); iv4 = asm_xor (rkeylf, iv4); iv5 = asm_xor (rkeylf, iv5); iv6 = asm_xor (rkeylf, iv6); iv7 = asm_xor (rkeylf, iv7); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND b0 = asm_ncipherlast_be (b0, iv0); b1 = asm_ncipherlast_be (b1, iv1); b2 = asm_ncipherlast_be (b2, iv2); b3 = asm_ncipherlast_be (b3, iv3); b4 = asm_ncipherlast_be (b4, iv4); b5 = asm_ncipherlast_be (b5, iv5); b6 = asm_ncipherlast_be (b6, iv6); b7 = asm_ncipherlast_be (b7, iv7); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; b0 = VEC_BE_SWAP (b0, bige_const); b1 = VEC_BE_SWAP (b1, bige_const); b2 = VEC_BE_SWAP (b2, bige_const); b3 = VEC_BE_SWAP (b3, bige_const); b4 = VEC_BE_SWAP (b4, bige_const); b5 = VEC_BE_SWAP (b5, bige_const); b6 = VEC_BE_SWAP (b6, bige_const); b7 = VEC_BE_SWAP (b7, bige_const); VEC_STORE_BE_NOSWAP (out, 0, b0); VEC_STORE_BE_NOSWAP (out, 1, b1); VEC_STORE_BE_NOSWAP (out, 2, b2); VEC_STORE_BE_NOSWAP (out, 3, b3); VEC_STORE_BE_NOSWAP (out, 4, b4); VEC_STORE_BE_NOSWAP (out, 5, b5); VEC_STORE_BE_NOSWAP (out, 6, b6); VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { b0 = VEC_LOAD_BE (in, 0, bige_const); b1 = VEC_LOAD_BE (in, 1, bige_const); b2 = VEC_LOAD_BE (in, 2, bige_const); b3 = VEC_LOAD_BE (in, 3, bige_const); l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); iv ^= rkey0; iv0 = iv ^ l0; iv1 = iv ^ l0 ^ l1; iv2 = iv ^ l1; iv3 = iv ^ l1 ^ l; b0 ^= iv0; b1 ^= iv1; b2 ^= iv2; b3 ^= iv3; iv = iv3 ^ rkey0; #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_ncipher_be (b0, rkey); \ b1 = asm_ncipher_be (b1, rkey); \ b2 = asm_ncipher_be (b2, rkey); \ b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND rkey = rkeylast ^ rkey0; b0 = asm_ncipherlast_be (b0, rkey ^ iv0); b1 = asm_ncipherlast_be (b1, rkey ^ iv1); b2 = asm_ncipherlast_be (b2, rkey ^ iv2); b3 = asm_ncipherlast_be (b3, rkey ^ iv3); VEC_STORE_BE (out, 0, b0, bige_const); VEC_STORE_BE (out, 1, b1, bige_const); VEC_STORE_BE (out, 2, b2, bige_const); VEC_STORE_BE (out, 3, b3, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3; in += 4; out += 4; nblocks -= 4; } for (; nblocks; nblocks--) { l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ b ^= iv; AES_DECRYPT (b, rounds); b ^= iv; /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; } } VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const); VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const); c->u_mode.ocb.data_nblocks = data_nblocks; return 0; } size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks) { const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *abuf = (const u128_t *)abuf_arg; int rounds = ctx->rounds; u64 data_nblocks = c->u_mode.ocb.aad_nblocks; block l0, l1, l2, l; block b0, b1, b2, b3, b4, b5, b6, b7, b; block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; block rkey, frkey; block ctr, iv; ROUND_KEY_VARIABLES; iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const); ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const); l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); PRELOAD_ROUND_KEYS (rounds); for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); b = VEC_LOAD_BE (abuf, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ b ^= iv; AES_ENCRYPT (b, rounds); ctr ^= b; abuf += 1; } for (; nblocks >= 8; nblocks -= 8) { b0 = VEC_LOAD_BE (abuf, 0, bige_const); b1 = VEC_LOAD_BE (abuf, 1, bige_const); b2 = VEC_LOAD_BE (abuf, 2, bige_const); b3 = VEC_LOAD_BE (abuf, 3, bige_const); b4 = VEC_LOAD_BE (abuf, 4, bige_const); b5 = VEC_LOAD_BE (abuf, 5, bige_const); b6 = VEC_LOAD_BE (abuf, 6, bige_const); b7 = VEC_LOAD_BE (abuf, 7, bige_const); l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const); frkey = rkey0; iv ^= frkey; iv0 = iv ^ l0; iv1 = iv ^ l0 ^ l1; iv2 = iv ^ l1; iv3 = iv ^ l1 ^ l2; iv4 = iv ^ l1 ^ l2 ^ l0; iv5 = iv ^ l2 ^ l0; iv6 = iv ^ l2; iv7 = iv ^ l2 ^ l; b0 ^= iv0; b1 ^= iv1; b2 ^= iv2; b3 ^= iv3; b4 ^= iv4; b5 ^= iv5; b6 ^= iv6; b7 ^= iv7; iv = iv7 ^ frkey; #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); \ b4 = asm_cipher_be (b4, rkey); \ b5 = asm_cipher_be (b5, rkey); \ b6 = asm_cipher_be (b6, rkey); \ b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND rkey = rkeylast; b0 = asm_cipherlast_be (b0, rkey); b1 = asm_cipherlast_be (b1, rkey); b2 = asm_cipherlast_be (b2, rkey); b3 = asm_cipherlast_be (b3, rkey); b4 = asm_cipherlast_be (b4, rkey); b5 = asm_cipherlast_be (b5, rkey); b6 = asm_cipherlast_be (b6, rkey); b7 = asm_cipherlast_be (b7, rkey); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; abuf += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { b0 = VEC_LOAD_BE (abuf, 0, bige_const); b1 = VEC_LOAD_BE (abuf, 1, bige_const); b2 = VEC_LOAD_BE (abuf, 2, bige_const); b3 = VEC_LOAD_BE (abuf, 3, bige_const); l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); frkey = rkey0; iv ^= frkey; iv0 = iv ^ l0; iv1 = iv ^ l0 ^ l1; iv2 = iv ^ l1; iv3 = iv ^ l1 ^ l; b0 ^= iv0; b1 ^= iv1; b2 ^= iv2; b3 ^= iv3; iv = iv3 ^ frkey; #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND rkey = rkeylast; b0 = asm_cipherlast_be (b0, rkey); b1 = asm_cipherlast_be (b1, rkey); b2 = asm_cipherlast_be (b2, rkey); b3 = asm_cipherlast_be (b3, rkey); ctr ^= b0 ^ b1 ^ b2 ^ b3; abuf += 4; nblocks -= 4; } for (; nblocks; nblocks--) { l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); b = VEC_LOAD_BE (abuf, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ b ^= iv; AES_ENCRYPT (b, rounds); ctr ^= b; abuf += 1; } VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const); VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const); c->u_mode.ocb.aad_nblocks = data_nblocks; return 0; } void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { #ifdef WORDS_BIGENDIAN static const block vec_bswap128_const = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; #else static const block vec_bswap128_const = { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 }; #endif static const unsigned char vec_tweak_const[16] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 }; static const vector unsigned long long vec_shift63_const = { 63, 63 }; const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; block tweak; block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf; block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7; block tweak_const, bswap128_const, shift63_const; ROUND_KEY_VARIABLES; tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const); bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0); shift63_const = ALIGNED_LOAD (&vec_shift63_const, 0); tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const); tweak = asm_vperm1 (tweak, bswap128_const); #define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \ do { \ block tmp1, tmp2; \ tmp1 = asm_swap_uint64_halfs(tin); \ tmp2 = asm_add_uint64(tin, tin); \ tmp1 = asm_sra_int64(tmp1, shift63_const) & tweak_const; \ tout = asm_xor(tmp1, tmp2); \ } while (0) if (encrypt) { const u128_t *rk = (u128_t *)&ctx->keyschenc; PRELOAD_ROUND_KEYS (rounds); for (; nblocks >= 8; nblocks -= 8) { b0 = VEC_LOAD_BE_NOSWAP (in, 0); b1 = VEC_LOAD_BE_NOSWAP (in, 1); b2 = VEC_LOAD_BE_NOSWAP (in, 2); b3 = VEC_LOAD_BE_NOSWAP (in, 3); tweak0 = tweak; GEN_TWEAK (tweak1, tweak0); tweak0 = asm_vperm1 (tweak0, bswap128_const); b4 = VEC_LOAD_BE_NOSWAP (in, 4); b5 = VEC_LOAD_BE_NOSWAP (in, 5); GEN_TWEAK (tweak2, tweak1); tweak1 = asm_vperm1 (tweak1, bswap128_const); b6 = VEC_LOAD_BE_NOSWAP (in, 6); b7 = VEC_LOAD_BE_NOSWAP (in, 7); in += 8; b0 = VEC_BE_SWAP(b0, bige_const); b1 = VEC_BE_SWAP(b1, bige_const); GEN_TWEAK (tweak3, tweak2); tweak2 = asm_vperm1 (tweak2, bswap128_const); GEN_TWEAK (tweak4, tweak3); tweak3 = asm_vperm1 (tweak3, bswap128_const); b2 = VEC_BE_SWAP(b2, bige_const); b3 = VEC_BE_SWAP(b3, bige_const); GEN_TWEAK (tweak5, tweak4); tweak4 = asm_vperm1 (tweak4, bswap128_const); GEN_TWEAK (tweak6, tweak5); tweak5 = asm_vperm1 (tweak5, bswap128_const); b4 = VEC_BE_SWAP(b4, bige_const); b5 = VEC_BE_SWAP(b5, bige_const); GEN_TWEAK (tweak7, tweak6); tweak6 = asm_vperm1 (tweak6, bswap128_const); GEN_TWEAK (tweak, tweak7); tweak7 = asm_vperm1 (tweak7, bswap128_const); b6 = VEC_BE_SWAP(b6, bige_const); b7 = VEC_BE_SWAP(b7, bige_const); tweak0 = asm_xor (tweak0, rkey0); tweak1 = asm_xor (tweak1, rkey0); tweak2 = asm_xor (tweak2, rkey0); tweak3 = asm_xor (tweak3, rkey0); tweak4 = asm_xor (tweak4, rkey0); tweak5 = asm_xor (tweak5, rkey0); tweak6 = asm_xor (tweak6, rkey0); tweak7 = asm_xor (tweak7, rkey0); b0 = asm_xor (b0, tweak0); b1 = asm_xor (b1, tweak1); b2 = asm_xor (b2, tweak2); b3 = asm_xor (b3, tweak3); b4 = asm_xor (b4, tweak4); b5 = asm_xor (b5, tweak5); b6 = asm_xor (b6, tweak6); b7 = asm_xor (b7, tweak7); #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); \ b4 = asm_cipher_be (b4, rkey); \ b5 = asm_cipher_be (b5, rkey); \ b6 = asm_cipher_be (b6, rkey); \ b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); rkeylf = asm_xor (rkeylast, rkey0); DO_ROUND(8); tweak0 = asm_xor (tweak0, rkeylf); tweak1 = asm_xor (tweak1, rkeylf); tweak2 = asm_xor (tweak2, rkeylf); tweak3 = asm_xor (tweak3, rkeylf); tweak4 = asm_xor (tweak4, rkeylf); tweak5 = asm_xor (tweak5, rkeylf); tweak6 = asm_xor (tweak6, rkeylf); tweak7 = asm_xor (tweak7, rkeylf); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND b0 = asm_cipherlast_be (b0, tweak0); b1 = asm_cipherlast_be (b1, tweak1); b2 = asm_cipherlast_be (b2, tweak2); b3 = asm_cipherlast_be (b3, tweak3); b0 = VEC_BE_SWAP (b0, bige_const); b1 = VEC_BE_SWAP (b1, bige_const); b4 = asm_cipherlast_be (b4, tweak4); b5 = asm_cipherlast_be (b5, tweak5); b2 = VEC_BE_SWAP (b2, bige_const); b3 = VEC_BE_SWAP (b3, bige_const); b6 = asm_cipherlast_be (b6, tweak6); b7 = asm_cipherlast_be (b7, tweak7); VEC_STORE_BE_NOSWAP (out, 0, b0); VEC_STORE_BE_NOSWAP (out, 1, b1); b4 = VEC_BE_SWAP (b4, bige_const); b5 = VEC_BE_SWAP (b5, bige_const); VEC_STORE_BE_NOSWAP (out, 2, b2); VEC_STORE_BE_NOSWAP (out, 3, b3); b6 = VEC_BE_SWAP (b6, bige_const); b7 = VEC_BE_SWAP (b7, bige_const); VEC_STORE_BE_NOSWAP (out, 4, b4); VEC_STORE_BE_NOSWAP (out, 5, b5); VEC_STORE_BE_NOSWAP (out, 6, b6); VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { tweak0 = tweak; GEN_TWEAK (tweak1, tweak0); GEN_TWEAK (tweak2, tweak1); GEN_TWEAK (tweak3, tweak2); GEN_TWEAK (tweak, tweak3); b0 = VEC_LOAD_BE (in, 0, bige_const); b1 = VEC_LOAD_BE (in, 1, bige_const); b2 = VEC_LOAD_BE (in, 2, bige_const); b3 = VEC_LOAD_BE (in, 3, bige_const); tweak0 = asm_vperm1 (tweak0, bswap128_const); tweak1 = asm_vperm1 (tweak1, bswap128_const); tweak2 = asm_vperm1 (tweak2, bswap128_const); tweak3 = asm_vperm1 (tweak3, bswap128_const); b0 ^= tweak0 ^ rkey0; b1 ^= tweak1 ^ rkey0; b2 ^= tweak2 ^ rkey0; b3 ^= tweak3 ^ rkey0; #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_cipher_be (b0, rkey); \ b1 = asm_cipher_be (b1, rkey); \ b2 = asm_cipher_be (b2, rkey); \ b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND rkey = rkeylast; b0 = asm_cipherlast_be (b0, rkey ^ tweak0); b1 = asm_cipherlast_be (b1, rkey ^ tweak1); b2 = asm_cipherlast_be (b2, rkey ^ tweak2); b3 = asm_cipherlast_be (b3, rkey ^ tweak3); VEC_STORE_BE (out, 0, b0, bige_const); VEC_STORE_BE (out, 1, b1, bige_const); VEC_STORE_BE (out, 2, b2, bige_const); VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; nblocks -= 4; } for (; nblocks; nblocks--) { tweak0 = asm_vperm1 (tweak, bswap128_const); /* Xor-Encrypt/Decrypt-Xor block. */ b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; /* Generate next tweak. */ GEN_TWEAK (tweak, tweak); AES_ENCRYPT (b, rounds); b ^= tweak0; VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } } else { const u128_t *rk = (u128_t *)&ctx->keyschdec; if (!ctx->decryption_prepared) { internal_aes_ppc_prepare_decryption (ctx); ctx->decryption_prepared = 1; } PRELOAD_ROUND_KEYS (rounds); for (; nblocks >= 8; nblocks -= 8) { b0 = VEC_LOAD_BE_NOSWAP (in, 0); b1 = VEC_LOAD_BE_NOSWAP (in, 1); b2 = VEC_LOAD_BE_NOSWAP (in, 2); b3 = VEC_LOAD_BE_NOSWAP (in, 3); tweak0 = tweak; GEN_TWEAK (tweak1, tweak0); tweak0 = asm_vperm1 (tweak0, bswap128_const); b4 = VEC_LOAD_BE_NOSWAP (in, 4); b5 = VEC_LOAD_BE_NOSWAP (in, 5); GEN_TWEAK (tweak2, tweak1); tweak1 = asm_vperm1 (tweak1, bswap128_const); b6 = VEC_LOAD_BE_NOSWAP (in, 6); b7 = VEC_LOAD_BE_NOSWAP (in, 7); in += 8; b0 = VEC_BE_SWAP(b0, bige_const); b1 = VEC_BE_SWAP(b1, bige_const); GEN_TWEAK (tweak3, tweak2); tweak2 = asm_vperm1 (tweak2, bswap128_const); GEN_TWEAK (tweak4, tweak3); tweak3 = asm_vperm1 (tweak3, bswap128_const); b2 = VEC_BE_SWAP(b2, bige_const); b3 = VEC_BE_SWAP(b3, bige_const); GEN_TWEAK (tweak5, tweak4); tweak4 = asm_vperm1 (tweak4, bswap128_const); GEN_TWEAK (tweak6, tweak5); tweak5 = asm_vperm1 (tweak5, bswap128_const); b4 = VEC_BE_SWAP(b4, bige_const); b5 = VEC_BE_SWAP(b5, bige_const); GEN_TWEAK (tweak7, tweak6); tweak6 = asm_vperm1 (tweak6, bswap128_const); GEN_TWEAK (tweak, tweak7); tweak7 = asm_vperm1 (tweak7, bswap128_const); b6 = VEC_BE_SWAP(b6, bige_const); b7 = VEC_BE_SWAP(b7, bige_const); tweak0 = asm_xor (tweak0, rkey0); tweak1 = asm_xor (tweak1, rkey0); tweak2 = asm_xor (tweak2, rkey0); tweak3 = asm_xor (tweak3, rkey0); tweak4 = asm_xor (tweak4, rkey0); tweak5 = asm_xor (tweak5, rkey0); tweak6 = asm_xor (tweak6, rkey0); tweak7 = asm_xor (tweak7, rkey0); b0 = asm_xor (b0, tweak0); b1 = asm_xor (b1, tweak1); b2 = asm_xor (b2, tweak2); b3 = asm_xor (b3, tweak3); b4 = asm_xor (b4, tweak4); b5 = asm_xor (b5, tweak5); b6 = asm_xor (b6, tweak6); b7 = asm_xor (b7, tweak7); #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_ncipher_be (b0, rkey); \ b1 = asm_ncipher_be (b1, rkey); \ b2 = asm_ncipher_be (b2, rkey); \ b3 = asm_ncipher_be (b3, rkey); \ b4 = asm_ncipher_be (b4, rkey); \ b5 = asm_ncipher_be (b5, rkey); \ b6 = asm_ncipher_be (b6, rkey); \ b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); rkeylf = asm_xor (rkeylast, rkey0); DO_ROUND(8); tweak0 = asm_xor (tweak0, rkeylf); tweak1 = asm_xor (tweak1, rkeylf); tweak2 = asm_xor (tweak2, rkeylf); tweak3 = asm_xor (tweak3, rkeylf); tweak4 = asm_xor (tweak4, rkeylf); tweak5 = asm_xor (tweak5, rkeylf); tweak6 = asm_xor (tweak6, rkeylf); tweak7 = asm_xor (tweak7, rkeylf); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND b0 = asm_ncipherlast_be (b0, tweak0); b1 = asm_ncipherlast_be (b1, tweak1); b2 = asm_ncipherlast_be (b2, tweak2); b3 = asm_ncipherlast_be (b3, tweak3); b0 = VEC_BE_SWAP (b0, bige_const); b1 = VEC_BE_SWAP (b1, bige_const); b4 = asm_ncipherlast_be (b4, tweak4); b5 = asm_ncipherlast_be (b5, tweak5); b2 = VEC_BE_SWAP (b2, bige_const); b3 = VEC_BE_SWAP (b3, bige_const); b6 = asm_ncipherlast_be (b6, tweak6); b7 = asm_ncipherlast_be (b7, tweak7); VEC_STORE_BE_NOSWAP (out, 0, b0); VEC_STORE_BE_NOSWAP (out, 1, b1); b4 = VEC_BE_SWAP (b4, bige_const); b5 = VEC_BE_SWAP (b5, bige_const); VEC_STORE_BE_NOSWAP (out, 2, b2); VEC_STORE_BE_NOSWAP (out, 3, b3); b6 = VEC_BE_SWAP (b6, bige_const); b7 = VEC_BE_SWAP (b7, bige_const); VEC_STORE_BE_NOSWAP (out, 4, b4); VEC_STORE_BE_NOSWAP (out, 5, b5); VEC_STORE_BE_NOSWAP (out, 6, b6); VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { tweak0 = tweak; GEN_TWEAK (tweak1, tweak0); GEN_TWEAK (tweak2, tweak1); GEN_TWEAK (tweak3, tweak2); GEN_TWEAK (tweak, tweak3); b0 = VEC_LOAD_BE (in, 0, bige_const); b1 = VEC_LOAD_BE (in, 1, bige_const); b2 = VEC_LOAD_BE (in, 2, bige_const); b3 = VEC_LOAD_BE (in, 3, bige_const); tweak0 = asm_vperm1 (tweak0, bswap128_const); tweak1 = asm_vperm1 (tweak1, bswap128_const); tweak2 = asm_vperm1 (tweak2, bswap128_const); tweak3 = asm_vperm1 (tweak3, bswap128_const); b0 ^= tweak0 ^ rkey0; b1 ^= tweak1 ^ rkey0; b2 ^= tweak2 ^ rkey0; b3 ^= tweak3 ^ rkey0; #define DO_ROUND(r) \ rkey = ALIGNED_LOAD (rk, r); \ b0 = asm_ncipher_be (b0, rkey); \ b1 = asm_ncipher_be (b1, rkey); \ b2 = asm_ncipher_be (b2, rkey); \ b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); DO_ROUND(3); DO_ROUND(4); DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); DO_ROUND(9); if (rounds >= 12) { DO_ROUND(10); DO_ROUND(11); if (rounds > 12) { DO_ROUND(12); DO_ROUND(13); } } #undef DO_ROUND rkey = rkeylast; b0 = asm_ncipherlast_be (b0, rkey ^ tweak0); b1 = asm_ncipherlast_be (b1, rkey ^ tweak1); b2 = asm_ncipherlast_be (b2, rkey ^ tweak2); b3 = asm_ncipherlast_be (b3, rkey ^ tweak3); VEC_STORE_BE (out, 0, b0, bige_const); VEC_STORE_BE (out, 1, b1, bige_const); VEC_STORE_BE (out, 2, b2, bige_const); VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; nblocks -= 4; } for (; nblocks; nblocks--) { tweak0 = asm_vperm1 (tweak, bswap128_const); /* Xor-Encrypt/Decrypt-Xor block. */ b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; /* Generate next tweak. */ GEN_TWEAK (tweak, tweak); AES_DECRYPT (b, rounds); b ^= tweak0; VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } } tweak = asm_vperm1 (tweak, bswap128_const); VEC_STORE_BE (tweak_arg, 0, tweak, bige_const); #undef GEN_TWEAK } diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 19f6a7e1..53c4f126 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -1,203 +1,204 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden * Copyright (C) 2019-2020, 2022 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, * and Cryptogams by Andy Polyakov, and if made part of a release of either * or both projects, is thereafter dual-licensed under the license said project * is released under. */ #include #include "rijndael-internal.h" #include "cipher-internal.h" #include "bufhelp.h" #ifdef USE_PPC_CRYPTO #include "rijndael-ppc-common.h" #ifndef WORDS_BIGENDIAN static const block vec_bswap32_const_neg = { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 }; #endif static ASM_FUNC_ATTR_INLINE block asm_load_be_const(void) { #ifndef WORDS_BIGENDIAN return ALIGNED_LOAD (&vec_bswap32_const_neg, 0); #else static const block vec_dummy = { 0 }; return vec_dummy; #endif } static ASM_FUNC_ATTR_INLINE block asm_be_swap(block vec, block be_bswap_const) { (void)be_bswap_const; #ifndef WORDS_BIGENDIAN return asm_vperm1 (vec, be_bswap_const); #else return vec; #endif } static ASM_FUNC_ATTR_INLINE block asm_load_be_noswap(unsigned long offset, const void *ptr) { block vec; #if __GNUC__ >= 4 if (__builtin_constant_p (offset) && offset == 0) __asm__ volatile ("lxvw4x %x0,0,%1\n\t" : "=wa" (vec) : "r" ((uintptr_t)ptr) : "memory"); else #endif __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" : "=wa" (vec) : "r" (offset), "r" ((uintptr_t)ptr) : "memory", "r0"); /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */ return vec; } static ASM_FUNC_ATTR_INLINE void asm_store_be_noswap(block vec, unsigned long offset, void *ptr) { /* NOTE: vec be-swapped using 'asm_be_swap' by caller */ #if __GNUC__ >= 4 if (__builtin_constant_p (offset) && offset == 0) __asm__ volatile ("stxvw4x %x0,0,%1\n\t" : : "wa" (vec), "r" ((uintptr_t)ptr) : "memory"); else #endif __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" : : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) : "memory", "r0"); } static ASM_FUNC_ATTR_INLINE u32 _gcry_aes_sbox4_ppc8(u32 fourbytes) { vec_u32 vec_fourbyte = { fourbytes, fourbytes, fourbytes, fourbytes }; #ifdef WORDS_BIGENDIAN return ((vec_u32)asm_sbox_be((block)vec_fourbyte))[1]; #else return ((vec_u32)asm_sbox_be((block)vec_fourbyte))[2]; #endif } static ASM_FUNC_ATTR_INLINE unsigned int keysched_idx(unsigned int in) { #ifdef WORDS_BIGENDIAN return in; #else return (in & ~3U) | (3U - (in & 3U)); #endif } void _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) { u32 tk_u32[MAXKC]; unsigned int rounds = ctx->rounds; unsigned int KC = rounds - 6; u32 *W_u32 = ctx->keyschenc32b; unsigned int i, j; u32 tk_prev; byte rcon = 1; for (i = 0; i < KC; i += 2) { unsigned int idx0 = keysched_idx(i + 0); unsigned int idx1 = keysched_idx(i + 1); tk_u32[i + 0] = buf_get_le32(key + i * 4 + 0); tk_u32[i + 1] = buf_get_le32(key + i * 4 + 4); W_u32[idx0] = _gcry_bswap32(tk_u32[i + 0]); W_u32[idx1] = _gcry_bswap32(tk_u32[i + 1]); } for (i = KC, j = KC, tk_prev = tk_u32[KC - 1]; i < 4 * (rounds + 1); i += 2, j += 2) { unsigned int idx0 = keysched_idx(i + 0); unsigned int idx1 = keysched_idx(i + 1); u32 temp0 = tk_prev; u32 temp1; if (j == KC) { j = 0; temp0 = _gcry_aes_sbox4_ppc8(rol(temp0, 24)) ^ rcon; rcon = ((rcon << 1) ^ (-(rcon >> 7) & 0x1b)) & 0xff; } else if (KC == 8 && j == 4) { temp0 = _gcry_aes_sbox4_ppc8(temp0); } temp1 = tk_u32[j + 0]; tk_u32[j + 0] = temp0 ^ temp1; tk_u32[j + 1] ^= temp0 ^ temp1; tk_prev = tk_u32[j + 1]; W_u32[idx0] = _gcry_bswap32(tk_u32[j + 0]); W_u32[idx1] = _gcry_bswap32(tk_u32[j + 1]); } wipememory(tk_u32, sizeof(tk_u32)); } void _gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) { internal_aes_ppc_prepare_decryption (ctx); } #define GCRY_AES_PPC8 1 #define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc8_encrypt #define DECRYPT_BLOCK_FUNC _gcry_aes_ppc8_decrypt +#define ECB_CRYPT_FUNC _gcry_aes_ppc8_ecb_crypt #define CFB_ENC_FUNC _gcry_aes_ppc8_cfb_enc #define CFB_DEC_FUNC _gcry_aes_ppc8_cfb_dec #define CBC_ENC_FUNC _gcry_aes_ppc8_cbc_enc #define CBC_DEC_FUNC _gcry_aes_ppc8_cbc_dec #define CTR_ENC_FUNC _gcry_aes_ppc8_ctr_enc #define OCB_CRYPT_FUNC _gcry_aes_ppc8_ocb_crypt #define OCB_AUTH_FUNC _gcry_aes_ppc8_ocb_auth #define XTS_CRYPT_FUNC _gcry_aes_ppc8_xts_crypt #include #endif /* USE_PPC_CRYPTO */ diff --git a/cipher/rijndael-ppc9le.c b/cipher/rijndael-ppc9le.c index facdedd4..9ce9c224 100644 --- a/cipher/rijndael-ppc9le.c +++ b/cipher/rijndael-ppc9le.c @@ -1,102 +1,103 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden * Copyright (C) 2019-2020 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, * and Cryptogams by Andy Polyakov, and if made part of a release of either * or both projects, is thereafter dual-licensed under the license said project * is released under. */ #include #include "rijndael-internal.h" #include "cipher-internal.h" #include "bufhelp.h" #ifdef USE_PPC_CRYPTO_WITH_PPC9LE #include "rijndael-ppc-common.h" static ASM_FUNC_ATTR_INLINE block asm_load_be_const(void) { static const block vec_dummy = { 0 }; return vec_dummy; } static ASM_FUNC_ATTR_INLINE block asm_be_swap(block vec, block be_bswap_const) { (void)be_bswap_const; return vec; } static ASM_FUNC_ATTR_INLINE block asm_load_be_noswap(unsigned long offset, const void *ptr) { block vec; #if __GNUC__ >= 4 if (__builtin_constant_p (offset) && offset == 0) __asm__ volatile ("lxvb16x %x0,0,%1\n\t" : "=wa" (vec) : "r" ((uintptr_t)ptr) : "memory"); else #endif __asm__ volatile ("lxvb16x %x0,%1,%2\n\t" : "=wa" (vec) : "r" (offset), "r" ((uintptr_t)ptr) : "memory", "r0"); return vec; } static ASM_FUNC_ATTR_INLINE void asm_store_be_noswap(block vec, unsigned long offset, void *ptr) { #if __GNUC__ >= 4 if (__builtin_constant_p (offset) && offset == 0) __asm__ volatile ("stxvb16x %x0,0,%1\n\t" : : "wa" (vec), "r" ((uintptr_t)ptr) : "memory"); else #endif __asm__ volatile ("stxvb16x %x0,%1,%2\n\t" : : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) : "memory", "r0"); } #define GCRY_AES_PPC9LE 1 #define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc9le_encrypt #define DECRYPT_BLOCK_FUNC _gcry_aes_ppc9le_decrypt +#define ECB_CRYPT_FUNC _gcry_aes_ppc9le_ecb_crypt #define CFB_ENC_FUNC _gcry_aes_ppc9le_cfb_enc #define CFB_DEC_FUNC _gcry_aes_ppc9le_cfb_dec #define CBC_ENC_FUNC _gcry_aes_ppc9le_cbc_enc #define CBC_DEC_FUNC _gcry_aes_ppc9le_cbc_dec #define CTR_ENC_FUNC _gcry_aes_ppc9le_ctr_enc #define OCB_CRYPT_FUNC _gcry_aes_ppc9le_ocb_crypt #define OCB_AUTH_FUNC _gcry_aes_ppc9le_ocb_auth #define XTS_CRYPT_FUNC _gcry_aes_ppc9le_xts_crypt #include #endif /* USE_PPC_CRYPTO */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 84cb7109..071d4a16 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1,1996 +1,2006 @@ /* Rijndael (AES) for GnuPG * Copyright (C) 2000, 2001, 2002, 2003, 2007, * 2008, 2011, 2012 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . ******************************************************************* * The code here is based on the optimized implementation taken from * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, * which carries this notice: *------------------------------------------ * rijndael-alg-fst.c v2.3 April '2000 * * Optimised ANSI C code * * authors: v1.0: Antoon Bosselaers * v2.0: Vincent Rijmen * v2.3: Paulo Barreto * * This code is placed in the public domain. *------------------------------------------ * * The SP800-38a document is available at: * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf * */ #include #include #include #include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_AMD64_ASM /* AMD64 assembly implementations of AES */ extern unsigned int _gcry_aes_amd64_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_amd64_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_AMD64_ASM*/ #ifdef USE_AESNI /* AES-NI (AMD64 & i386) accelerated implementations of AES */ extern void _gcry_aes_aesni_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_aesni_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_aesni_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_ctr32le_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_aesni_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif #ifdef USE_VAES /* VAES (AMD64) accelerated implementation of AES */ extern void _gcry_aes_vaes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_vaes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_vaes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_vaes_ctr32le_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_vaes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_vaes_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_vaes_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif #ifdef USE_SSSE3 /* SSSE3 (AMD64) vector permutation implementation of AES */ extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_ssse3_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_ssse3_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); #endif #ifdef USE_PADLOCK extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); extern unsigned int _gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); extern void _gcry_aes_padlock_prepare_decryption (RIJNDAEL_context *ctx); #endif #ifdef USE_ARM_ASM /* ARM assembly implementations of AES */ extern unsigned int _gcry_aes_arm_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_arm_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_ARM_ASM*/ #ifdef USE_ARM_CE /* ARMv8 Crypto Extension implementations of AES */ extern void _gcry_aes_armv8_ce_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_armv8_ce_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_armv8_ce_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_armv8_ce_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_armv8_ce_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_ctr32le_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_armv8_ce_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif /*USE_ARM_ASM*/ #ifdef USE_PPC_CRYPTO /* PowerPC Crypto implementations of AES */ extern void _gcry_aes_ppc8_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_ppc8_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); +extern void _gcry_aes_ppc8_ecb_crypt (void *context, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); + extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif /*USE_PPC_CRYPTO*/ #ifdef USE_PPC_CRYPTO_WITH_PPC9LE /* Power9 little-endian crypto implementations of AES */ extern unsigned int _gcry_aes_ppc9le_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); +extern void _gcry_aes_ppc9le_ecb_crypt (void *context, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); + extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc9le_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_ppc9le_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc9le_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc9le_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_ppc9le_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_ppc9le_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_ppc9le_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_p10le_gcm_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_S390X_CRYPTO /* zSeries crypto implementations of AES */ extern int _gcry_aes_s390x_setup_acceleration(RIJNDAEL_context *ctx, unsigned int keylen, unsigned int hwfeatures, cipher_bulk_ops_t *bulk_ops); extern void _gcry_aes_s390x_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_s390x_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_s390x_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_s390x_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); #endif /*USE_S390X_CRYPTO*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static void _gcry_aes_cfb_enc (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks); static void _gcry_aes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static void _gcry_aes_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); static void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); static size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); static void _gcry_aes_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); /* All the numbers. */ #include "rijndael-tables.h" /* Function prototypes. */ static const char *selftest(void); static void prepare_decryption(RIJNDAEL_context *ctx); /* Prefetching for encryption/decryption tables. */ static inline void prefetch_table(const volatile byte *tab, size_t len) { size_t i; for (i = 0; len - i >= 8 * 32; i += 8 * 32) { (void)tab[i + 0 * 32]; (void)tab[i + 1 * 32]; (void)tab[i + 2 * 32]; (void)tab[i + 3 * 32]; (void)tab[i + 4 * 32]; (void)tab[i + 5 * 32]; (void)tab[i + 6 * 32]; (void)tab[i + 7 * 32]; } for (; i < len; i += 32) { (void)tab[i]; } (void)tab[len - 1]; } static void prefetch_enc(void) { /* Modify counters to trigger copy-on-write and unsharing if physical pages * of look-up table are shared between processes. Modifying counters also * causes checksums for pages to change and hint same-page merging algorithm * that these pages are frequently changing. */ enc_tables.counter_head++; enc_tables.counter_tail++; /* Prefetch look-up tables to cache. */ prefetch_table((const void *)&enc_tables, sizeof(enc_tables)); } static void prefetch_dec(void) { /* Modify counters to trigger copy-on-write and unsharing if physical pages * of look-up table are shared between processes. Modifying counters also * causes checksums for pages to change and hint same-page merging algorithm * that these pages are frequently changing. */ dec_tables.counter_head++; dec_tables.counter_tail++; /* Prefetch look-up tables to cache. */ prefetch_table((const void *)&dec_tables, sizeof(dec_tables)); } static inline u32 sbox4(u32 inb4) { u32 out; out = (encT[(inb4 >> 0) & 0xffU] & 0xff00U) >> 8; out |= (encT[(inb4 >> 8) & 0xffU] & 0xff00U) >> 0; out |= (encT[(inb4 >> 16) & 0xffU] & 0xff0000U) << 0; out |= (encT[(inb4 >> 24) & 0xffU] & 0xff0000U) << 8; return out; } /* Perform the key setup. */ static gcry_err_code_t do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, cipher_bulk_ops_t *bulk_ops) { static int initialized = 0; static const char *selftest_failed = 0; void (*hw_setkey)(RIJNDAEL_context *ctx, const byte *key) = NULL; int rounds; unsigned int KC; unsigned int hwfeatures; /* The on-the-fly self tests are only run in non-fips mode. In fips mode explicit self-tests are required. Actually the on-the-fly self-tests are not fully thread-safe and it might happen that a failed self-test won't get noticed in another thread. FIXME: We might want to have a central registry of succeeded self-tests. */ if (!fips_mode () && !initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("%s\n", selftest_failed ); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; if( keylen == 128/8 ) { rounds = 10; KC = 4; } else if ( keylen == 192/8 ) { rounds = 12; KC = 6; } else if ( keylen == 256/8 ) { rounds = 14; KC = 8; } else return GPG_ERR_INV_KEYLEN; ctx->rounds = rounds; hwfeatures = _gcry_get_hw_features (); ctx->decryption_prepared = 0; /* Setup default bulk encryption routines. */ memset (bulk_ops, 0, sizeof(*bulk_ops)); bulk_ops->cfb_enc = _gcry_aes_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_ctr_enc; bulk_ops->ocb_crypt = _gcry_aes_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_xts_crypt; (void)hwfeatures; if (0) { ; } #ifdef USE_AESNI else if (hwfeatures & HWF_INTEL_AESNI) { hw_setkey = _gcry_aes_aesni_do_setkey; ctx->encrypt_fn = _gcry_aes_aesni_encrypt; ctx->decrypt_fn = _gcry_aes_aesni_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_aesni_prepare_decryption; ctx->use_avx = !!(hwfeatures & HWF_INTEL_AVX); ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2); /* Setup AES-NI bulk encryption routines. */ bulk_ops->cfb_enc = _gcry_aes_aesni_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_aesni_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_aesni_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_aesni_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_aesni_ctr_enc; bulk_ops->ctr32le_enc = _gcry_aes_aesni_ctr32le_enc; bulk_ops->ocb_crypt = _gcry_aes_aesni_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_aesni_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_aesni_xts_crypt; bulk_ops->ecb_crypt = _gcry_aes_aesni_ecb_crypt; #ifdef USE_VAES if ((hwfeatures & HWF_INTEL_VAES_VPCLMUL) && (hwfeatures & HWF_INTEL_AVX2)) { /* Setup VAES bulk encryption routines. */ bulk_ops->cfb_dec = _gcry_aes_vaes_cfb_dec; bulk_ops->cbc_dec = _gcry_aes_vaes_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_vaes_ctr_enc; bulk_ops->ctr32le_enc = _gcry_aes_vaes_ctr32le_enc; bulk_ops->ocb_crypt = _gcry_aes_vaes_ocb_crypt; bulk_ops->xts_crypt = _gcry_aes_vaes_xts_crypt; bulk_ops->ecb_crypt = _gcry_aes_vaes_ecb_crypt; } #endif } #endif #ifdef USE_PADLOCK else if ((hwfeatures & HWF_PADLOCK_AES) && keylen == 128/8) { ctx->encrypt_fn = _gcry_aes_padlock_encrypt; ctx->decrypt_fn = _gcry_aes_padlock_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_padlock_prepare_decryption; memcpy (ctx->padlockkey, key, keylen); } #endif #ifdef USE_SSSE3 else if (hwfeatures & HWF_INTEL_SSSE3) { hw_setkey = _gcry_aes_ssse3_do_setkey; ctx->encrypt_fn = _gcry_aes_ssse3_encrypt; ctx->decrypt_fn = _gcry_aes_ssse3_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_ssse3_prepare_decryption; /* Setup SSSE3 bulk encryption routines. */ bulk_ops->cfb_enc = _gcry_aes_ssse3_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_ssse3_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_ssse3_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_ssse3_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_ssse3_ctr_enc; bulk_ops->ocb_crypt = _gcry_aes_ssse3_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_ssse3_ocb_auth; } #endif #ifdef USE_ARM_CE else if (hwfeatures & HWF_ARM_AES) { hw_setkey = _gcry_aes_armv8_ce_setkey; ctx->encrypt_fn = _gcry_aes_armv8_ce_encrypt; ctx->decrypt_fn = _gcry_aes_armv8_ce_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_armv8_ce_prepare_decryption; /* Setup ARM-CE bulk encryption routines. */ bulk_ops->cfb_enc = _gcry_aes_armv8_ce_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_armv8_ce_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_armv8_ce_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_armv8_ce_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_armv8_ce_ctr_enc; bulk_ops->ctr32le_enc = _gcry_aes_armv8_ce_ctr32le_enc; bulk_ops->ocb_crypt = _gcry_aes_armv8_ce_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_armv8_ce_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_armv8_ce_xts_crypt; bulk_ops->ecb_crypt = _gcry_aes_armv8_ce_ecb_crypt; } #endif #ifdef USE_PPC_CRYPTO_WITH_PPC9LE else if ((hwfeatures & HWF_PPC_VCRYPTO) && (hwfeatures & HWF_PPC_ARCH_3_00)) { hw_setkey = _gcry_aes_ppc8_setkey; ctx->encrypt_fn = _gcry_aes_ppc9le_encrypt; ctx->decrypt_fn = _gcry_aes_ppc9le_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption; /* Setup PPC9LE bulk encryption routines. */ + bulk_ops->ecb_crypt = _gcry_aes_ppc9le_ecb_crypt; bulk_ops->cfb_enc = _gcry_aes_ppc9le_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_ppc9le_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_ppc9le_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_ppc9le_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_ppc9le_ctr_enc; bulk_ops->ocb_crypt = _gcry_aes_ppc9le_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_ppc9le_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_ppc9le_xts_crypt; if (hwfeatures & HWF_PPC_ARCH_3_10) /* for P10 */ bulk_ops->gcm_crypt = _gcry_aes_p10le_gcm_crypt; # ifdef ENABLE_FORCE_SOFT_HWFEATURES /* HWF_PPC_ARCH_3_10 above is used as soft HW-feature indicator for P10. * Actual implementation works with HWF_PPC_ARCH_3_00 also. */ if (hwfeatures & HWF_PPC_ARCH_3_00) bulk_ops->gcm_crypt = _gcry_aes_p10le_gcm_crypt; # endif } #endif #ifdef USE_PPC_CRYPTO else if (hwfeatures & HWF_PPC_VCRYPTO) { hw_setkey = _gcry_aes_ppc8_setkey; ctx->encrypt_fn = _gcry_aes_ppc8_encrypt; ctx->decrypt_fn = _gcry_aes_ppc8_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption; /* Setup PPC8 bulk encryption routines. */ + bulk_ops->ecb_crypt = _gcry_aes_ppc8_ecb_crypt; bulk_ops->cfb_enc = _gcry_aes_ppc8_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_ppc8_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_ppc8_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_ppc8_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_ppc8_ctr_enc; bulk_ops->ocb_crypt = _gcry_aes_ppc8_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_ppc8_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_ppc8_xts_crypt; } #endif #ifdef USE_S390X_CRYPTO else if (_gcry_aes_s390x_setup_acceleration (ctx, keylen, hwfeatures, bulk_ops)) { hw_setkey = _gcry_aes_s390x_setkey; ctx->encrypt_fn = _gcry_aes_s390x_encrypt; ctx->decrypt_fn = _gcry_aes_s390x_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_s390x_prepare_decryption; } #endif else { ctx->encrypt_fn = do_encrypt; ctx->decrypt_fn = do_decrypt; ctx->prefetch_enc_fn = prefetch_enc; ctx->prefetch_dec_fn = prefetch_dec; ctx->prepare_decryption = prepare_decryption; } /* NB: We don't yet support Padlock hardware key generation. */ if (hw_setkey) { hw_setkey (ctx, key); } else { u32 W_prev; u32 *W_u32 = ctx->keyschenc32b; byte rcon = 1; unsigned int i, j; prefetch_enc(); for (i = 0; i < KC; i += 2) { W_u32[i + 0] = buf_get_le32(key + i * 4 + 0); W_u32[i + 1] = buf_get_le32(key + i * 4 + 4); } for (i = KC, j = KC, W_prev = W_u32[KC - 1]; i < 4 * (rounds + 1); i += 2, j += 2) { u32 temp0 = W_prev; u32 temp1; if (j == KC) { j = 0; temp0 = sbox4(rol(temp0, 24)) ^ rcon; rcon = ((rcon << 1) ^ (-(rcon >> 7) & 0x1b)) & 0xff; } else if (KC == 8 && j == 4) { temp0 = sbox4(temp0); } temp1 = W_u32[i - KC + 0]; W_u32[i + 0] = temp0 ^ temp1; W_u32[i + 1] = W_u32[i - KC + 1] ^ temp0 ^ temp1; W_prev = W_u32[i + 1]; } } return 0; } static gcry_err_code_t rijndael_setkey (void *context, const byte *key, const unsigned keylen, cipher_bulk_ops_t *bulk_ops) { RIJNDAEL_context *ctx = context; return do_setkey (ctx, key, keylen, bulk_ops); } /* Make a decryption key from an encryption key. */ static void prepare_decryption( RIJNDAEL_context *ctx ) { const byte *sbox = ((const byte *)encT) + 1; int r; prefetch_enc(); prefetch_dec(); ctx->keyschdec32[0][0] = ctx->keyschenc32[0][0]; ctx->keyschdec32[0][1] = ctx->keyschenc32[0][1]; ctx->keyschdec32[0][2] = ctx->keyschenc32[0][2]; ctx->keyschdec32[0][3] = ctx->keyschenc32[0][3]; for (r = 1; r < ctx->rounds; r++) { u32 *wi = ctx->keyschenc32[r]; u32 *wo = ctx->keyschdec32[r]; u32 wt; wt = wi[0]; wo[0] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[1]; wo[1] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[2]; wo[2] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[3]; wo[3] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); } ctx->keyschdec32[r][0] = ctx->keyschenc32[r][0]; ctx->keyschdec32[r][1] = ctx->keyschenc32[r][1]; ctx->keyschdec32[r][2] = ctx->keyschenc32[r][2]; ctx->keyschdec32[r][3] = ctx->keyschenc32[r][3]; } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Encrypt one block. A and B may be the same. */ static unsigned int do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschenc32) const byte *sbox = ((const byte *)encT) + 1; int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[0][0]; sa[1] = sb[1] ^ rk[0][1]; sa[2] = sb[2] ^ rk[0][2]; sa[3] = sb[3] ^ rk[0][3]; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; for (r = 2; r < rounds; r++) { sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r++; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } /* Last round is special. */ sb[0] = ((u32)sbox[(byte)(sa[0] >> (0 * 8)) * 4]) << (0 * 8); sb[3] = ((u32)sbox[(byte)(sa[0] >> (1 * 8)) * 4]) << (1 * 8); sb[2] = ((u32)sbox[(byte)(sa[0] >> (2 * 8)) * 4]) << (2 * 8); sb[1] = ((u32)sbox[(byte)(sa[0] >> (3 * 8)) * 4]) << (3 * 8); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= ((u32)sbox[(byte)(sa[1] >> (0 * 8)) * 4]) << (0 * 8); sa[0] ^= ((u32)sbox[(byte)(sa[1] >> (1 * 8)) * 4]) << (1 * 8); sb[3] ^= ((u32)sbox[(byte)(sa[1] >> (2 * 8)) * 4]) << (2 * 8); sb[2] ^= ((u32)sbox[(byte)(sa[1] >> (3 * 8)) * 4]) << (3 * 8); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= ((u32)sbox[(byte)(sa[2] >> (0 * 8)) * 4]) << (0 * 8); sa[1] ^= ((u32)sbox[(byte)(sa[2] >> (1 * 8)) * 4]) << (1 * 8); sa[0] ^= ((u32)sbox[(byte)(sa[2] >> (2 * 8)) * 4]) << (2 * 8); sb[3] ^= ((u32)sbox[(byte)(sa[2] >> (3 * 8)) * 4]) << (3 * 8); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= ((u32)sbox[(byte)(sa[3] >> (0 * 8)) * 4]) << (0 * 8); sa[2] ^= ((u32)sbox[(byte)(sa[3] >> (1 * 8)) * 4]) << (1 * 8); sa[1] ^= ((u32)sbox[(byte)(sa[3] >> (2 * 8)) * 4]) << (2 * 8); sa[0] ^= ((u32)sbox[(byte)(sa[3] >> (3 * 8)) * 4]) << (3 * 8); sa[3] = rk[r][3] ^ sb[3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56 + 2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, enc_tables.T); #elif defined(USE_ARM_ASM) return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, enc_tables.T); #else return do_encrypt_fn (ctx, bx, ax); #endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/ } static unsigned int rijndael_encrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); return ctx->encrypt_fn (ctx, b, a); } /* Bulk encryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_aes_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { /* Encrypt the IV. */ burn_depth = encrypt_fn (ctx, iv, iv); /* XOR the input with the IV and store input into IV. */ cipher_block_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_aes_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char *last_iv; unsigned int burn_depth = 0; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); last_iv = iv; for ( ;nblocks; nblocks-- ) { cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, outbuf, outbuf); last_iv = outbuf; inbuf += BLOCKSIZE; if (!cbc_mac) outbuf += BLOCKSIZE; } if (last_iv != iv) cipher_block_cpy (iv, last_iv, BLOCKSIZE); if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CTR mode. Caller needs to make sure that CTR is aligned on a 16 byte boundary if AESNI; the minimum alignment is for an u32. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size BLOCKSIZE. */ static void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ burn_depth = encrypt_fn (ctx, tmp.x1, ctr); /* XOR the input with the encrypted counter and store in output. */ cipher_block_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; /* Increment the counter. */ cipher_block_add(ctr, 1, BLOCKSIZE); } wipememory(&tmp, sizeof(tmp)); if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Decrypt one block. A and B may be the same. */ static unsigned int do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschdec32) int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[rounds][0]; sa[1] = sb[1] ^ rk[rounds][1]; sa[2] = sb[2] ^ rk[rounds][2]; sa[3] = sb[3] ^ rk[rounds][3]; for (r = rounds - 1; r > 1; r--) { sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r--; sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; /* Last round is special. */ sb[0] = (u32)inv_sbox[(byte)(sa[0] >> (0 * 8))] << (0 * 8); sb[1] = (u32)inv_sbox[(byte)(sa[0] >> (1 * 8))] << (1 * 8); sb[2] = (u32)inv_sbox[(byte)(sa[0] >> (2 * 8))] << (2 * 8); sb[3] = (u32)inv_sbox[(byte)(sa[0] >> (3 * 8))] << (3 * 8); sa[0] = sb[0] ^ rk[0][0]; sb[1] ^= (u32)inv_sbox[(byte)(sa[1] >> (0 * 8))] << (0 * 8); sb[2] ^= (u32)inv_sbox[(byte)(sa[1] >> (1 * 8))] << (1 * 8); sb[3] ^= (u32)inv_sbox[(byte)(sa[1] >> (2 * 8))] << (2 * 8); sa[0] ^= (u32)inv_sbox[(byte)(sa[1] >> (3 * 8))] << (3 * 8); sa[1] = sb[1] ^ rk[0][1]; sb[2] ^= (u32)inv_sbox[(byte)(sa[2] >> (0 * 8))] << (0 * 8); sb[3] ^= (u32)inv_sbox[(byte)(sa[2] >> (1 * 8))] << (1 * 8); sa[0] ^= (u32)inv_sbox[(byte)(sa[2] >> (2 * 8))] << (2 * 8); sa[1] ^= (u32)inv_sbox[(byte)(sa[2] >> (3 * 8))] << (3 * 8); sa[2] = sb[2] ^ rk[0][2]; sb[3] ^= (u32)inv_sbox[(byte)(sa[3] >> (0 * 8))] << (0 * 8); sa[0] ^= (u32)inv_sbox[(byte)(sa[3] >> (1 * 8))] << (1 * 8); sa[1] ^= (u32)inv_sbox[(byte)(sa[3] >> (2 * 8))] << (2 * 8); sa[2] ^= (u32)inv_sbox[(byte)(sa[3] >> (3 * 8))] << (3 * 8); sa[3] = sb[3] ^ rk[0][3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56+2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ /* Decrypt one block. AX and BX may be the same. */ static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, dec_tables.T); #elif defined(USE_ARM_ASM) return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, dec_tables.T); #else return do_decrypt_fn (ctx, bx, ax); #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ } static inline void check_decryption_preparation (RIJNDAEL_context *ctx) { if ( !ctx->decryption_prepared ) { ctx->prepare_decryption ( ctx ); ctx->decryption_prepared = 1; } } static unsigned int rijndael_decrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); return ctx->decrypt_fn (ctx, b, a); } /* Bulk decryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_aes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { burn_depth = encrypt_fn (ctx, iv, iv); cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk decryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ burn_depth = decrypt_fn (ctx, savebuf, inbuf); cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption/decryption of complete blocks in OCB mode. */ static size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (encrypt) { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE); cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE); cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk authentication of complete blocks in OCB mode. */ static size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned int burn_depth = 0; union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.aad_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ cipher_block_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); abuf += BLOCKSIZE; } wipememory(&l_tmp, sizeof(l_tmp)); if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk encryption/decryption of complete blocks in XTS mode. */ static void _gcry_aes_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; rijndael_cryptfn_t crypt_fn; u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry; if (encrypt) { if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); crypt_fn = ctx->encrypt_fn; } else { check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); crypt_fn = ctx->decrypt_fn; } tweak_next_lo = buf_get_le64 (tweak + 0); tweak_next_hi = buf_get_le64 (tweak + 8); while (nblocks) { tweak_lo = tweak_next_lo; tweak_hi = tweak_next_hi; /* Xor-Encrypt/Decrypt-Xor block. */ tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo; tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi; buf_put_le64 (outbuf + 0, tmp_lo); buf_put_le64 (outbuf + 8, tmp_hi); /* Generate next tweak. */ carry = -(tweak_next_hi >> 63) & 0x87; tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63); tweak_next_lo = (tweak_next_lo << 1) ^ carry; burn_depth = crypt_fn (ctx, outbuf, outbuf); buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo); buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi); outbuf += GCRY_XTS_BLOCK_LEN; inbuf += GCRY_XTS_BLOCK_LEN; nblocks--; } buf_put_le64 (tweak + 0, tweak_next_lo); buf_put_le64 (tweak + 8, tweak_next_hi); if (burn_depth) _gcry_burn_stack (burn_depth + 5 * sizeof(void *)); } /* Run the self-tests for AES 128. Returns NULL on success. */ static const char* selftest_basic_128 (void) { RIJNDAEL_context *ctx; unsigned char ctxmem[sizeof(*ctx) + 16]; unsigned char scratch[16]; cipher_bulk_ops_t bulk_ops; /* The test vectors are from the AES supplied ones; more or less randomly taken from ecb_tbl.txt (I=42,81,14) */ #if 1 static const unsigned char plaintext_128[16] = { 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A }; static const unsigned char key_128[16] = { 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA }; static const unsigned char ciphertext_128[16] = { 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD }; #else /* Test vectors from fips-197, appendix C. */ # warning debug test vectors in use static const unsigned char plaintext_128[16] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff }; static const unsigned char key_128[16] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ }; static const unsigned char ciphertext_128[16] = { 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a }; #endif ctx = (void *)(ctxmem + ((16 - ((uintptr_t)ctxmem & 15)) & 15)); rijndael_setkey (ctx, key_128, sizeof (key_128), &bulk_ops); rijndael_encrypt (ctx, scratch, plaintext_128); if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) { return "AES-128 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) return "AES-128 test decryption failed."; return NULL; } /* Run the self-tests for AES 192. Returns NULL on success. */ static const char* selftest_basic_192 (void) { RIJNDAEL_context *ctx; unsigned char ctxmem[sizeof(*ctx) + 16]; unsigned char scratch[16]; cipher_bulk_ops_t bulk_ops; static unsigned char plaintext_192[16] = { 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 }; static unsigned char key_192[24] = { 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 }; static const unsigned char ciphertext_192[16] = { 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA }; ctx = (void *)(ctxmem + ((16 - ((uintptr_t)ctxmem & 15)) & 15)); rijndael_setkey (ctx, key_192, sizeof(key_192), &bulk_ops); rijndael_encrypt (ctx, scratch, plaintext_192); if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) { return "AES-192 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) return "AES-192 test decryption failed."; return NULL; } /* Run the self-tests for AES 256. Returns NULL on success. */ static const char* selftest_basic_256 (void) { RIJNDAEL_context *ctx; unsigned char ctxmem[sizeof(*ctx) + 16]; unsigned char scratch[16]; cipher_bulk_ops_t bulk_ops; static unsigned char plaintext_256[16] = { 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 }; static unsigned char key_256[32] = { 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E }; static const unsigned char ciphertext_256[16] = { 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 }; ctx = (void *)(ctxmem + ((16 - ((uintptr_t)ctxmem & 15)) & 15)); rijndael_setkey (ctx, key_256, sizeof(key_256), &bulk_ops); rijndael_encrypt (ctx, scratch, plaintext_256); if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) { return "AES-256 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) return "AES-256 test decryption failed."; return NULL; } /* Run all the self-tests and return NULL on success. This function is used for the on-the-fly self-tests. */ static const char * selftest (void) { const char *r; if ( (r = selftest_basic_128 ()) || (r = selftest_basic_192 ()) || (r = selftest_basic_256 ()) ) return r; return r; } /* SP800-38a.pdf for AES-128. */ static const char * selftest_fips_128_38a (int requested_mode) { static const struct tv { int mode; const unsigned char key[16]; const unsigned char iv[16]; struct { const unsigned char input[16]; const unsigned char output[16]; } data[4]; } tv[2] = { { GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } } }, { GCRY_CIPHER_MODE_OFB, { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, } } }; unsigned char scratch[16]; gpg_error_t err; int tvi, idx; gcry_cipher_hd_t hdenc = NULL; gcry_cipher_hd_t hddec = NULL; #define Fail(a) do { \ _gcry_cipher_close (hdenc); \ _gcry_cipher_close (hddec); \ return a; \ } while (0) gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); for (tvi=0; tvi < DIM (tv); tvi++) if (tv[tvi].mode == requested_mode) break; if (tvi == DIM (tv)) Fail ("no test data for this mode"); err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); if (!err) err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); if (err) Fail ("set key"); err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); if (!err) err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); if (err) Fail ("set IV"); for (idx=0; idx < DIM (tv[tvi].data); idx++) { err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, tv[tvi].data[idx].input, sizeof tv[tvi].data[idx].input); if (err) Fail ("encrypt command"); if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) Fail ("encrypt mismatch"); err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, tv[tvi].data[idx].output, sizeof tv[tvi].data[idx].output); if (err) Fail ("decrypt command"); if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) Fail ("decrypt mismatch"); } #undef Fail _gcry_cipher_close (hdenc); _gcry_cipher_close (hddec); return NULL; } /* Complete selftest for AES-128 with all modes and driver code. */ static gpg_err_code_t selftest_fips_128 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "low-level"; errtxt = selftest_basic_128 (); if (errtxt) goto failed; if (extended) { what = "cfb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); if (errtxt) goto failed; what = "ofb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES128, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-192. */ static gpg_err_code_t selftest_fips_192 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_192 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES192, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-256. */ static gpg_err_code_t selftest_fips_256 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_256 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES256, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_CIPHER_AES128: ec = selftest_fips_128 (extended, report); break; case GCRY_CIPHER_AES192: ec = selftest_fips_192 (extended, report); break; case GCRY_CIPHER_AES256: ec = selftest_fips_256 (extended, report); break; default: ec = GPG_ERR_CIPHER_ALGO; break; } return ec; } static const char *rijndael_names[] = { "RIJNDAEL", "AES128", "AES-128", NULL }; static const gcry_cipher_oid_spec_t rijndael_oids[] = { { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, { "2.16.840.1.101.3.4.1.6", GCRY_CIPHER_MODE_GCM }, { "2.16.840.1.101.3.4.1.7", GCRY_CIPHER_MODE_CCM }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes = { GCRY_CIPHER_AES, {0, 1}, "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael192_names[] = { "RIJNDAEL192", "AES-192", NULL }; static const gcry_cipher_oid_spec_t rijndael192_oids[] = { { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, { "2.16.840.1.101.3.4.1.26", GCRY_CIPHER_MODE_GCM }, { "2.16.840.1.101.3.4.1.27", GCRY_CIPHER_MODE_CCM }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes192 = { GCRY_CIPHER_AES192, {0, 1}, "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael256_names[] = { "RIJNDAEL256", "AES-256", NULL }; static const gcry_cipher_oid_spec_t rijndael256_oids[] = { { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, { "2.16.840.1.101.3.4.1.46", GCRY_CIPHER_MODE_GCM }, { "2.16.840.1.101.3.4.1.47", GCRY_CIPHER_MODE_CCM }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes256 = { GCRY_CIPHER_AES256, {0, 1}, "AES256", rijndael256_names, rijndael256_oids, 16, 256, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests };