diff --git a/cipher/bulkhelp.h b/cipher/bulkhelp.h index c9ecaba6..b1b4b2e1 100644 --- a/cipher/bulkhelp.h +++ b/cipher/bulkhelp.h @@ -1,328 +1,396 @@ /* bulkhelp.h - Some bulk processing helpers * Copyright (C) 2022 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifndef GCRYPT_BULKHELP_H #define GCRYPT_BULKHELP_H #include "g10lib.h" #include "cipher-internal.h" #ifdef __x86_64__ /* Use u64 to store pointers for x32 support (assembly function assumes * 64-bit pointers). */ typedef u64 ocb_L_uintptr_t; #else typedef uintptr_t ocb_L_uintptr_t; #endif typedef unsigned int (*bulk_crypt_fn_t) (const void *ctx, byte *out, const byte *in, unsigned int num_blks); static inline ocb_L_uintptr_t * bulk_ocb_prepare_L_pointers_array_blk32 (gcry_cipher_hd_t c, ocb_L_uintptr_t Ls[32], u64 blkn) { unsigned int n = 32 - (blkn % 32); unsigned int i; for (i = 0; i < 32; i += 8) { Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; } Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4]; Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; return &Ls[(31 + n) % 32]; } static inline ocb_L_uintptr_t * bulk_ocb_prepare_L_pointers_array_blk16 (gcry_cipher_hd_t c, ocb_L_uintptr_t Ls[16], u64 blkn) { unsigned int n = 16 - (blkn % 16); unsigned int i; for (i = 0; i < 16; i += 8) { Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; } Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; return &Ls[(15 + n) % 16]; } static inline ocb_L_uintptr_t * bulk_ocb_prepare_L_pointers_array_blk8 (gcry_cipher_hd_t c, ocb_L_uintptr_t Ls[8], u64 blkn) { unsigned int n = 8 - (blkn % 8); Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; return &Ls[(7 + n) % 8]; } static inline unsigned int bulk_ctr_enc_128 (void *priv, bulk_crypt_fn_t crypt_fn, byte *outbuf, const byte *inbuf, size_t nblocks, byte *ctr, byte *tmpbuf, size_t tmpbuf_nblocks, unsigned int *num_used_tmpblocks) { unsigned int tmp_used = 16; unsigned int burn_depth = 0; unsigned int nburn; while (nblocks >= 1) { size_t curr_blks = nblocks > tmpbuf_nblocks ? tmpbuf_nblocks : nblocks; size_t i; if (curr_blks * 16 > tmp_used) tmp_used = curr_blks * 16; cipher_block_cpy (tmpbuf + 0 * 16, ctr, 16); for (i = 1; i < curr_blks; i++) { cipher_block_cpy (&tmpbuf[i * 16], ctr, 16); cipher_block_add (&tmpbuf[i * 16], i, 16); } cipher_block_add (ctr, curr_blks, 16); nburn = crypt_fn (priv, tmpbuf, tmpbuf, curr_blks); burn_depth = nburn > burn_depth ? nburn : burn_depth; for (i = 0; i < curr_blks; i++) { cipher_block_xor (outbuf, &tmpbuf[i * 16], inbuf, 16); outbuf += 16; inbuf += 16; } nblocks -= curr_blks; } *num_used_tmpblocks = tmp_used; return burn_depth; } static inline unsigned int bulk_cbc_dec_128 (void *priv, bulk_crypt_fn_t crypt_fn, byte *outbuf, const byte *inbuf, size_t nblocks, byte *iv, byte *tmpbuf, size_t tmpbuf_nblocks, unsigned int *num_used_tmpblocks) { unsigned int tmp_used = 16; unsigned int burn_depth = 0; unsigned int nburn; while (nblocks >= 1) { size_t curr_blks = nblocks > tmpbuf_nblocks ? tmpbuf_nblocks : nblocks; size_t i; if (curr_blks * 16 > tmp_used) tmp_used = curr_blks * 16; nburn = crypt_fn (priv, tmpbuf, inbuf, curr_blks); burn_depth = nburn > burn_depth ? nburn : burn_depth; for (i = 0; i < curr_blks; i++) { cipher_block_xor_n_copy_2(outbuf, &tmpbuf[i * 16], iv, inbuf, 16); outbuf += 16; inbuf += 16; } nblocks -= curr_blks; } *num_used_tmpblocks = tmp_used; return burn_depth; } static inline unsigned int bulk_cfb_dec_128 (void *priv, bulk_crypt_fn_t crypt_fn, byte *outbuf, const byte *inbuf, size_t nblocks, byte *iv, byte *tmpbuf, size_t tmpbuf_nblocks, unsigned int *num_used_tmpblocks) { unsigned int tmp_used = 16; unsigned int burn_depth = 0; unsigned int nburn; while (nblocks >= 1) { size_t curr_blks = nblocks > tmpbuf_nblocks ? tmpbuf_nblocks : nblocks; size_t i; if (curr_blks * 16 > tmp_used) tmp_used = curr_blks * 16; cipher_block_cpy (&tmpbuf[0 * 16], iv, 16); if (curr_blks > 1) memcpy (&tmpbuf[1 * 16], &inbuf[(1 - 1) * 16], 16 * curr_blks - 16); cipher_block_cpy (iv, &inbuf[(curr_blks - 1) * 16], 16); nburn = crypt_fn (priv, tmpbuf, tmpbuf, curr_blks); burn_depth = nburn > burn_depth ? nburn : burn_depth; for (i = 0; i < curr_blks; i++) { cipher_block_xor (outbuf, inbuf, &tmpbuf[i * 16], 16); outbuf += 16; inbuf += 16; } nblocks -= curr_blks; } *num_used_tmpblocks = tmp_used; return burn_depth; } static inline unsigned int bulk_ocb_crypt_128 (gcry_cipher_hd_t c, void *priv, bulk_crypt_fn_t crypt_fn, byte *outbuf, const byte *inbuf, size_t nblocks, u64 *blkn, int encrypt, byte *tmpbuf, size_t tmpbuf_nblocks, unsigned int *num_used_tmpblocks) { unsigned int tmp_used = 16; unsigned int burn_depth = 0; unsigned int nburn; while (nblocks >= 1) { size_t curr_blks = nblocks > tmpbuf_nblocks ? tmpbuf_nblocks : nblocks; size_t i; if (curr_blks * 16 > tmp_used) tmp_used = curr_blks * 16; for (i = 0; i < curr_blks; i++) { const unsigned char *l = ocb_get_l(c, ++*blkn); /* Checksum_i = Checksum_{i-1} xor P_i */ if (encrypt) cipher_block_xor_1(c->u_ctr.ctr, &inbuf[i * 16], 16); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_2dst (&tmpbuf[i * 16], c->u_iv.iv, l, 16); cipher_block_xor (&outbuf[i * 16], &inbuf[i * 16], c->u_iv.iv, 16); } /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ nburn = crypt_fn (priv, outbuf, outbuf, curr_blks); burn_depth = nburn > burn_depth ? nburn : burn_depth; for (i = 0; i < curr_blks; i++) { cipher_block_xor_1 (&outbuf[i * 16], &tmpbuf[i * 16], 16); /* Checksum_i = Checksum_{i-1} xor P_i */ if (!encrypt) cipher_block_xor_1(c->u_ctr.ctr, &outbuf[i * 16], 16); } outbuf += curr_blks * 16; inbuf += curr_blks * 16; nblocks -= curr_blks; } *num_used_tmpblocks = tmp_used; return burn_depth; } static inline unsigned int bulk_ocb_auth_128 (gcry_cipher_hd_t c, void *priv, bulk_crypt_fn_t crypt_fn, const byte *abuf, size_t nblocks, u64 *blkn, byte *tmpbuf, size_t tmpbuf_nblocks, unsigned int *num_used_tmpblocks) { unsigned int tmp_used = 16; unsigned int burn_depth = 0; unsigned int nburn; while (nblocks >= 1) { size_t curr_blks = nblocks > tmpbuf_nblocks ? tmpbuf_nblocks : nblocks; size_t i; if (curr_blks * 16 > tmp_used) tmp_used = curr_blks * 16; for (i = 0; i < curr_blks; i++) { const unsigned char *l = ocb_get_l(c, ++*blkn); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_2dst (&tmpbuf[i * 16], c->u_mode.ocb.aad_offset, l, 16); cipher_block_xor_1 (&tmpbuf[i * 16], &abuf[i * 16], 16); } /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ nburn = crypt_fn (priv, tmpbuf, tmpbuf, curr_blks); burn_depth = nburn > burn_depth ? nburn : burn_depth; for (i = 0; i < curr_blks; i++) { cipher_block_xor_1 (c->u_mode.ocb.aad_sum, &tmpbuf[i * 16], 16); } abuf += curr_blks * 16; nblocks -= curr_blks; } *num_used_tmpblocks = tmp_used; return burn_depth; } +static inline unsigned int +bulk_xts_crypt_128 (void *priv, bulk_crypt_fn_t crypt_fn, byte *outbuf, + const byte *inbuf, size_t nblocks, byte *tweak, + byte *tmpbuf, size_t tmpbuf_nblocks, + unsigned int *num_used_tmpblocks) +{ + u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry; + unsigned int tmp_used = 16; + unsigned int burn_depth = 0; + unsigned int nburn; + + tweak_next_lo = buf_get_le64 (tweak + 0); + tweak_next_hi = buf_get_le64 (tweak + 8); + + while (nblocks >= 1) + { + size_t curr_blks = nblocks > tmpbuf_nblocks ? tmpbuf_nblocks : nblocks; + size_t i; + + if (curr_blks * 16 > tmp_used) + tmp_used = curr_blks * 16; + + for (i = 0; i < curr_blks; i++) + { + tweak_lo = tweak_next_lo; + tweak_hi = tweak_next_hi; + + /* Generate next tweak. */ + carry = -(tweak_next_hi >> 63) & 0x87; + tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63); + tweak_next_lo = (tweak_next_lo << 1) ^ carry; + + /* Xor-Encrypt/Decrypt-Xor block. */ + tmp_lo = buf_get_le64 (inbuf + i * 16 + 0) ^ tweak_lo; + tmp_hi = buf_get_le64 (inbuf + i * 16 + 8) ^ tweak_hi; + buf_put_he64 (&tmpbuf[i * 16 + 0], tweak_lo); + buf_put_he64 (&tmpbuf[i * 16 + 8], tweak_hi); + buf_put_le64 (outbuf + i * 16 + 0, tmp_lo); + buf_put_le64 (outbuf + i * 16 + 8, tmp_hi); + } + + nburn = crypt_fn (priv, outbuf, outbuf, curr_blks); + burn_depth = nburn > burn_depth ? nburn : burn_depth; + + for (i = 0; i < curr_blks; i++) + { + /* Xor-Encrypt/Decrypt-Xor block. */ + tweak_lo = buf_get_he64 (&tmpbuf[i * 16 + 0]); + tweak_hi = buf_get_he64 (&tmpbuf[i * 16 + 8]); + tmp_lo = buf_get_le64 (outbuf + i * 16 + 0) ^ tweak_lo; + tmp_hi = buf_get_le64 (outbuf + i * 16 + 8) ^ tweak_hi; + buf_put_le64 (outbuf + i * 16 + 0, tmp_lo); + buf_put_le64 (outbuf + i * 16 + 8, tmp_hi); + } + + inbuf += curr_blks * 16; + outbuf += curr_blks * 16; + nblocks -= curr_blks; + } + + buf_put_le64 (tweak + 0, tweak_next_lo); + buf_put_le64 (tweak + 8, tweak_next_hi); + + *num_used_tmpblocks = tmp_used; + return burn_depth; +} + + #endif /*GCRYPT_BULKHELP_H*/ diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c index 741a9ce7..00e23750 100644 --- a/cipher/camellia-glue.c +++ b/cipher/camellia-glue.c @@ -1,1356 +1,1395 @@ /* camellia-glue.c - Glue for the Camellia cipher * Copyright (C) 2007 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ /* I put all the libgcrypt-specific stuff in this file to keep the camellia.c/camellia.h files exactly as provided by NTT. If they update their code, this should make it easier to bring the changes in. - dshaw There is one small change which needs to be done: Include the following code at the top of camellia.h: */ #if 0 /* To use Camellia with libraries it is often useful to keep the name * space of the library clean. The following macro is thus useful: * * #define CAMELLIA_EXT_SYM_PREFIX foo_ * * This prefixes all external symbols with "foo_". */ #ifdef HAVE_CONFIG_H #include #endif #ifdef CAMELLIA_EXT_SYM_PREFIX #define CAMELLIA_PREFIX1(x,y) x ## y #define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y) #define CAMELLIA_PREFIX(x) CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x) #define Camellia_Ekeygen CAMELLIA_PREFIX(Camellia_Ekeygen) #define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock) #define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock) #define camellia_decrypt128 CAMELLIA_PREFIX(camellia_decrypt128) #define camellia_decrypt256 CAMELLIA_PREFIX(camellia_decrypt256) #define camellia_encrypt128 CAMELLIA_PREFIX(camellia_encrypt128) #define camellia_encrypt256 CAMELLIA_PREFIX(camellia_encrypt256) #define camellia_setup128 CAMELLIA_PREFIX(camellia_setup128) #define camellia_setup192 CAMELLIA_PREFIX(camellia_setup192) #define camellia_setup256 CAMELLIA_PREFIX(camellia_setup256) #endif /*CAMELLIA_EXT_SYM_PREFIX*/ #endif /* Code sample. */ #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "camellia.h" #include "bufhelp.h" #include "cipher-internal.h" #include "cipher-selftest.h" #include "bulkhelp.h" /* Helper macro to force alignment to 16 bytes. */ #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED # define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) #else # define ATTR_ALIGNED_16 #endif /* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */ #undef USE_AESNI_AVX #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AESNI_AVX 1 # endif #endif /* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */ #undef USE_AESNI_AVX2 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AESNI_AVX2 1 # endif #endif /* USE_VAES_AVX2 inidicates whether to compile with Intel VAES/AVX2 code. */ #undef USE_VAES_AVX2 #if defined(USE_AESNI_AVX2) && defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL) # define USE_VAES_AVX2 1 #endif /* USE_GFNI_AVX2 inidicates whether to compile with Intel GFNI/AVX2 code. */ #undef USE_GFNI_AVX2 #if defined(USE_AESNI_AVX2) && defined(ENABLE_GFNI_SUPPORT) # define USE_GFNI_AVX2 1 #endif typedef struct { KEY_TABLE_TYPE keytable; int keybitlength; #ifdef USE_AESNI_AVX unsigned int use_aesni_avx:1; /* AES-NI/AVX implementation shall be used. */ #endif /*USE_AESNI_AVX*/ #ifdef USE_AESNI_AVX2 unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used. */ unsigned int use_vaes_avx2:1; /* VAES/AVX2 implementation shall be used. */ unsigned int use_gfni_avx2:1; /* GFNI/AVX2 implementation shall be used. */ #endif /*USE_AESNI_AVX2*/ } CAMELLIA_context; /* Assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI #undef ASM_EXTRA_STACK #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define ASM_FUNC_ABI __attribute__((sysv_abi)) # define ASM_EXTRA_STACK (10 * 16) # else # define ASM_FUNC_ABI # define ASM_EXTRA_STACK 0 # endif #endif #ifdef USE_AESNI_AVX /* Assembler implementations of Camellia using AES-NI and AVX. Process data in 16 block same time. */ extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx, const unsigned char *abuf, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx, const unsigned char *key, unsigned int keylen) ASM_FUNC_ABI; static const int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 16 + 2 * sizeof(void *) + ASM_EXTRA_STACK; #endif #ifdef USE_AESNI_AVX2 /* Assembler implementations of Camellia using AES-NI and AVX2. Process data in 32 block same time. */ extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx, const unsigned char *abuf, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx2_enc_blk1_32(const CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned int nblocks) ASM_FUNC_ABI; extern void _gcry_camellia_aesni_avx2_dec_blk1_32(const CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned int nblocks) ASM_FUNC_ABI; static const int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + 2 * sizeof(void *) + ASM_EXTRA_STACK; #endif #ifdef USE_VAES_AVX2 /* Assembler implementations of Camellia using VAES and AVX2. Process data in 32 block same time. */ extern void _gcry_camellia_vaes_avx2_ctr_enc(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_camellia_vaes_avx2_cbc_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_camellia_vaes_avx2_cfb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_camellia_vaes_avx2_ocb_enc(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_vaes_avx2_ocb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_vaes_avx2_ocb_auth(CAMELLIA_context *ctx, const unsigned char *abuf, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_vaes_avx2_enc_blk1_32(const CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned int nblocks) ASM_FUNC_ABI; extern void _gcry_camellia_vaes_avx2_dec_blk1_32(const CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned int nblocks) ASM_FUNC_ABI; #endif #ifdef USE_GFNI_AVX2 /* Assembler implementations of Camellia using GFNI and AVX2. Process data in 32 block same time. */ extern void _gcry_camellia_gfni_avx2_ctr_enc(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_camellia_gfni_avx2_cbc_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_camellia_gfni_avx2_cfb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_camellia_gfni_avx2_ocb_enc(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_gfni_avx2_ocb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_gfni_avx2_ocb_auth(CAMELLIA_context *ctx, const unsigned char *abuf, unsigned char *offset, unsigned char *checksum, const u64 Ls[32]) ASM_FUNC_ABI; extern void _gcry_camellia_gfni_avx2_enc_blk1_32(const CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned int nblocks) ASM_FUNC_ABI; extern void _gcry_camellia_gfni_avx2_dec_blk1_32(const CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned int nblocks) ASM_FUNC_ABI; #endif static const char *selftest(void); static void _gcry_camellia_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static void _gcry_camellia_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static void _gcry_camellia_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); +static void _gcry_camellia_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt); static size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); static size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); static gcry_err_code_t camellia_setkey(void *c, const byte *key, unsigned keylen, cipher_bulk_ops_t *bulk_ops) { CAMELLIA_context *ctx=c; static int initialized=0; static const char *selftest_failed=NULL; #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) \ || defined(USE_VAES_AVX2) || defined(USE_GFNI_AVX2) unsigned int hwf = _gcry_get_hw_features (); #endif if(keylen!=16 && keylen!=24 && keylen!=32) return GPG_ERR_INV_KEYLEN; if(!initialized) { initialized=1; selftest_failed=selftest(); if(selftest_failed) log_error("%s\n",selftest_failed); } if(selftest_failed) return GPG_ERR_SELFTEST_FAILED; #ifdef USE_AESNI_AVX ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX); #endif #ifdef USE_AESNI_AVX2 ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2); ctx->use_vaes_avx2 = 0; ctx->use_gfni_avx2 = 0; #endif #ifdef USE_VAES_AVX2 ctx->use_vaes_avx2 = (hwf & HWF_INTEL_VAES_VPCLMUL) && (hwf & HWF_INTEL_AVX2); #endif #ifdef USE_GFNI_AVX2 ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2); #endif ctx->keybitlength=keylen*8; /* Setup bulk encryption routines. */ memset (bulk_ops, 0, sizeof(*bulk_ops)); bulk_ops->cbc_dec = _gcry_camellia_cbc_dec; bulk_ops->cfb_dec = _gcry_camellia_cfb_dec; bulk_ops->ctr_enc = _gcry_camellia_ctr_enc; bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt; bulk_ops->ocb_auth = _gcry_camellia_ocb_auth; +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2 || ctx->use_vaes_avx2 || ctx->use_gfni_avx2) + bulk_ops->xts_crypt = _gcry_camellia_xts_crypt; +#endif if (0) { } #ifdef USE_AESNI_AVX else if (ctx->use_aesni_avx) _gcry_camellia_aesni_avx_keygen(ctx, key, keylen); else #endif { Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable); _gcry_burn_stack ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */ +(4+32)*sizeof(u32)+2*sizeof(void*) /* camellia_setup192 */ +0+sizeof(int)+2*sizeof(void*) /* Camellia_Ekeygen */ +3*2*sizeof(void*) /* Function calls. */ ); } #ifdef USE_GFNI_AVX2 if (ctx->use_gfni_avx2) { /* Disable AESNI & VAES implementations when GFNI implementation is * enabled. */ #ifdef USE_AESNI_AVX ctx->use_aesni_avx = 0; #endif #ifdef USE_AESNI_AVX2 ctx->use_aesni_avx2 = 0; #endif #ifdef USE_VAES_AVX2 ctx->use_vaes_avx2 = 0; #endif } #endif return 0; } #ifdef USE_ARM_ASM /* Assembly implementations of Camellia. */ extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable, byte *outbuf, const byte *inbuf, const int keybits); extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable, byte *outbuf, const byte *inbuf, const int keybits); static void Camellia_EncryptBlock(const int keyBitLength, const unsigned char *plaintext, const KEY_TABLE_TYPE keyTable, unsigned char *cipherText) { _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext, keyBitLength); } static void Camellia_DecryptBlock(const int keyBitLength, const unsigned char *cipherText, const KEY_TABLE_TYPE keyTable, unsigned char *plaintext) { _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText, keyBitLength); } #ifdef __aarch64__ # define CAMELLIA_encrypt_stack_burn_size (0) # define CAMELLIA_decrypt_stack_burn_size (0) #else # define CAMELLIA_encrypt_stack_burn_size (15*4) # define CAMELLIA_decrypt_stack_burn_size (15*4) #endif static unsigned int camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) { CAMELLIA_context *ctx = c; Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size); } static unsigned int camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) { CAMELLIA_context *ctx=c; Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); } #else /*USE_ARM_ASM*/ static unsigned int camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) { CAMELLIA_context *ctx=c; Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); #define CAMELLIA_encrypt_stack_burn_size \ (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \ +4*sizeof(u32)+4*sizeof(u32) \ +2*sizeof(u32*)+4*sizeof(u32) \ +2*2*sizeof(void*) /* Function calls. */ \ ) return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size); } static unsigned int camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) { CAMELLIA_context *ctx=c; Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); #define CAMELLIA_decrypt_stack_burn_size \ (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \ +4*sizeof(u32)+4*sizeof(u32) \ +2*sizeof(u32*)+4*sizeof(u32) \ +2*2*sizeof(void*) /* Function calls. */ \ ) return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); } #endif /*!USE_ARM_ASM*/ static unsigned int camellia_encrypt_blk1_32 (const void *priv, byte *outbuf, const byte *inbuf, unsigned int num_blks) { const CAMELLIA_context *ctx = priv; unsigned int stack_burn_size = 0; gcry_assert (num_blks <= 32); #ifdef USE_GFNI_AVX2 if (ctx->use_gfni_avx2 && num_blks >= 3) { /* 3 or more parallel block GFNI processing is faster than * generic C implementation. */ _gcry_camellia_gfni_avx2_enc_blk1_32 (ctx, outbuf, inbuf, num_blks); return avx2_burn_stack_depth; } #endif #ifdef USE_VAES_AVX2 if (ctx->use_vaes_avx2 && num_blks >= 6) { /* 6 or more parallel block VAES processing is faster than * generic C implementation. */ _gcry_camellia_vaes_avx2_enc_blk1_32 (ctx, outbuf, inbuf, num_blks); return avx2_burn_stack_depth; } #endif #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2 && num_blks >= 6) { /* 6 or more parallel block AESNI processing is faster than * generic C implementation. */ _gcry_camellia_aesni_avx2_enc_blk1_32 (ctx, outbuf, inbuf, num_blks); return avx2_burn_stack_depth; } #endif while (num_blks) { stack_burn_size = camellia_encrypt((void *)ctx, outbuf, inbuf); outbuf += CAMELLIA_BLOCK_SIZE; inbuf += CAMELLIA_BLOCK_SIZE; num_blks--; } return stack_burn_size; } static unsigned int camellia_decrypt_blk1_32 (const void *priv, byte *outbuf, const byte *inbuf, unsigned int num_blks) { const CAMELLIA_context *ctx = priv; unsigned int stack_burn_size = 0; gcry_assert (num_blks <= 32); #ifdef USE_GFNI_AVX2 if (ctx->use_gfni_avx2 && num_blks >= 3) { /* 3 or more parallel block GFNI processing is faster than * generic C implementation. */ _gcry_camellia_gfni_avx2_dec_blk1_32 (ctx, outbuf, inbuf, num_blks); return avx2_burn_stack_depth; } #endif #ifdef USE_VAES_AVX2 if (ctx->use_vaes_avx2 && num_blks >= 6) { /* 6 or more parallel block VAES processing is faster than * generic C implementation. */ _gcry_camellia_vaes_avx2_dec_blk1_32 (ctx, outbuf, inbuf, num_blks); return avx2_burn_stack_depth; } #endif #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2 && num_blks >= 6) { /* 6 or more parallel block AESNI processing is faster than * generic C implementation. */ _gcry_camellia_aesni_avx2_dec_blk1_32 (ctx, outbuf, inbuf, num_blks); return avx2_burn_stack_depth; } #endif while (num_blks) { stack_burn_size = camellia_decrypt((void *)ctx, outbuf, inbuf); outbuf += CAMELLIA_BLOCK_SIZE; inbuf += CAMELLIA_BLOCK_SIZE; num_blks--; } return stack_burn_size; } /* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size CAMELLIA_BLOCK_SIZE. */ static void _gcry_camellia_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { CAMELLIA_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = 0; #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2) { int did_use_aesni_avx2 = 0; typeof (&_gcry_camellia_aesni_avx2_ctr_enc) bulk_ctr_fn = _gcry_camellia_aesni_avx2_ctr_enc; #ifdef USE_VAES_AVX2 if (ctx->use_vaes_avx2) bulk_ctr_fn =_gcry_camellia_vaes_avx2_ctr_enc; #endif #ifdef USE_GFNI_AVX2 if (ctx->use_gfni_avx2) bulk_ctr_fn =_gcry_camellia_gfni_avx2_ctr_enc; #endif /* Process data in 32 block chunks. */ while (nblocks >= 32) { bulk_ctr_fn (ctx, outbuf, inbuf, ctr); nblocks -= 32; outbuf += 32 * CAMELLIA_BLOCK_SIZE; inbuf += 32 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx2 = 1; } if (did_use_aesni_avx2) { if (burn_stack_depth < avx2_burn_stack_depth) burn_stack_depth = avx2_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif #ifdef USE_AESNI_AVX if (ctx->use_aesni_avx) { int did_use_aesni_avx = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 16; outbuf += 16 * CAMELLIA_BLOCK_SIZE; inbuf += 16 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx = 1; } if (did_use_aesni_avx) { if (burn_stack_depth < avx_burn_stack_depth) burn_stack_depth = avx_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif /* Process remaining blocks. */ if (nblocks) { byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; size_t nburn; nburn = bulk_ctr_enc_128(ctx, camellia_encrypt_blk1_32, outbuf, inbuf, nblocks, ctr, tmpbuf, sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, &tmp_used); burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; wipememory(tmpbuf, tmp_used); } if (burn_stack_depth) _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CBC mode. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_camellia_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { CAMELLIA_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = 0; #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2) { int did_use_aesni_avx2 = 0; typeof (&_gcry_camellia_aesni_avx2_cbc_dec) bulk_cbc_fn = _gcry_camellia_aesni_avx2_cbc_dec; #ifdef USE_VAES_AVX2 if (ctx->use_vaes_avx2) bulk_cbc_fn =_gcry_camellia_vaes_avx2_cbc_dec; #endif #ifdef USE_GFNI_AVX2 if (ctx->use_gfni_avx2) bulk_cbc_fn =_gcry_camellia_gfni_avx2_cbc_dec; #endif /* Process data in 32 block chunks. */ while (nblocks >= 32) { bulk_cbc_fn (ctx, outbuf, inbuf, iv); nblocks -= 32; outbuf += 32 * CAMELLIA_BLOCK_SIZE; inbuf += 32 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx2 = 1; } if (did_use_aesni_avx2) { if (burn_stack_depth < avx2_burn_stack_depth) burn_stack_depth = avx2_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_AESNI_AVX if (ctx->use_aesni_avx) { int did_use_aesni_avx = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 16; outbuf += 16 * CAMELLIA_BLOCK_SIZE; inbuf += 16 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx = 1; } if (did_use_aesni_avx) { if (burn_stack_depth < avx_burn_stack_depth) burn_stack_depth = avx_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ } #endif /* Process remaining blocks. */ if (nblocks) { byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; size_t nburn; nburn = bulk_cbc_dec_128(ctx, camellia_decrypt_blk1_32, outbuf, inbuf, nblocks, iv, tmpbuf, sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, &tmp_used); burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; wipememory(tmpbuf, tmp_used); } if (burn_stack_depth) _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CFB mode. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_camellia_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { CAMELLIA_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = 0; #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2) { int did_use_aesni_avx2 = 0; typeof (&_gcry_camellia_aesni_avx2_cfb_dec) bulk_cfb_fn = _gcry_camellia_aesni_avx2_cfb_dec; #ifdef USE_VAES_AVX2 if (ctx->use_vaes_avx2) bulk_cfb_fn =_gcry_camellia_vaes_avx2_cfb_dec; #endif #ifdef USE_GFNI_AVX2 if (ctx->use_gfni_avx2) bulk_cfb_fn =_gcry_camellia_gfni_avx2_cfb_dec; #endif /* Process data in 32 block chunks. */ while (nblocks >= 32) { bulk_cfb_fn (ctx, outbuf, inbuf, iv); nblocks -= 32; outbuf += 32 * CAMELLIA_BLOCK_SIZE; inbuf += 32 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx2 = 1; } if (did_use_aesni_avx2) { if (burn_stack_depth < avx2_burn_stack_depth) burn_stack_depth = avx2_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_AESNI_AVX if (ctx->use_aesni_avx) { int did_use_aesni_avx = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_camellia_aesni_avx_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 16; outbuf += 16 * CAMELLIA_BLOCK_SIZE; inbuf += 16 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx = 1; } if (did_use_aesni_avx) { if (burn_stack_depth < avx_burn_stack_depth) burn_stack_depth = avx_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ } #endif /* Process remaining blocks. */ if (nblocks) { byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; size_t nburn; nburn = bulk_cfb_dec_128(ctx, camellia_encrypt_blk1_32, outbuf, inbuf, nblocks, iv, tmpbuf, sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, &tmp_used); burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; wipememory(tmpbuf, tmp_used); } if (burn_stack_depth) _gcry_burn_stack(burn_stack_depth); } +/* Bulk encryption/decryption of complete blocks in XTS mode. */ +static void +_gcry_camellia_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ + CAMELLIA_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = 0; + + /* Process remaining blocks. */ + if (nblocks) + { + byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; + unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; + size_t nburn; + + nburn = bulk_xts_crypt_128(ctx, encrypt ? camellia_encrypt_blk1_32 + : camellia_decrypt_blk1_32, + outbuf, inbuf, nblocks, tweak, tmpbuf, + sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, + &tmp_used); + burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; + + wipememory(tmpbuf, tmp_used); + } + + if (burn_stack_depth) + _gcry_burn_stack(burn_stack_depth); +} + /* Bulk encryption/decryption of complete blocks in OCB mode. */ static size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) CAMELLIA_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = 0; u64 blkn = c->u_mode.ocb.data_nblocks; #else (void)c; (void)outbuf_arg; (void)inbuf_arg; (void)encrypt; #endif #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2) { int did_use_aesni_avx2 = 0; u64 Ls[32]; u64 *l; if (nblocks >= 32) { typeof (&_gcry_camellia_aesni_avx2_ocb_dec) bulk_ocb_fn = encrypt ? _gcry_camellia_aesni_avx2_ocb_enc : _gcry_camellia_aesni_avx2_ocb_dec; #ifdef USE_VAES_AVX2 if (ctx->use_vaes_avx2) bulk_ocb_fn = encrypt ? _gcry_camellia_vaes_avx2_ocb_enc : _gcry_camellia_vaes_avx2_ocb_dec; #endif #ifdef USE_GFNI_AVX2 if (ctx->use_gfni_avx2) bulk_ocb_fn = encrypt ? _gcry_camellia_gfni_avx2_ocb_enc : _gcry_camellia_gfni_avx2_ocb_dec; #endif l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn); /* Process data in 32 block chunks. */ while (nblocks >= 32) { blkn += 32; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32); bulk_ocb_fn (ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); nblocks -= 32; outbuf += 32 * CAMELLIA_BLOCK_SIZE; inbuf += 32 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx2 = 1; } } if (did_use_aesni_avx2) { if (burn_stack_depth < avx2_burn_stack_depth) burn_stack_depth = avx2_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_AESNI_AVX if (ctx->use_aesni_avx) { int did_use_aesni_avx = 0; u64 Ls[16]; u64 *l; if (nblocks >= 16) { l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) { blkn += 16; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); if (encrypt) _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); else _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); nblocks -= 16; outbuf += 16 * CAMELLIA_BLOCK_SIZE; inbuf += 16 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx = 1; } } if (did_use_aesni_avx) { if (burn_stack_depth < avx_burn_stack_depth) burn_stack_depth = avx_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ } #endif #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) /* Process remaining blocks. */ if (nblocks) { byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; size_t nburn; nburn = bulk_ocb_crypt_128 (c, ctx, encrypt ? camellia_encrypt_blk1_32 : camellia_decrypt_blk1_32, outbuf, inbuf, nblocks, &blkn, encrypt, tmpbuf, sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, &tmp_used); burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; wipememory(tmpbuf, tmp_used); nblocks = 0; } c->u_mode.ocb.data_nblocks = blkn; if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); #endif return nblocks; } /* Bulk authentication of complete blocks in OCB mode. */ static size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) CAMELLIA_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; int burn_stack_depth = 0; u64 blkn = c->u_mode.ocb.aad_nblocks; #else (void)c; (void)abuf_arg; #endif #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2) { int did_use_aesni_avx2 = 0; u64 Ls[32]; u64 *l; if (nblocks >= 32) { typeof (&_gcry_camellia_aesni_avx2_ocb_auth) bulk_auth_fn = _gcry_camellia_aesni_avx2_ocb_auth; #ifdef USE_VAES_AVX2 if (ctx->use_vaes_avx2) bulk_auth_fn = _gcry_camellia_vaes_avx2_ocb_auth; #endif #ifdef USE_GFNI_AVX2 if (ctx->use_gfni_avx2) bulk_auth_fn = _gcry_camellia_gfni_avx2_ocb_auth; #endif l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn); /* Process data in 32 block chunks. */ while (nblocks >= 32) { blkn += 32; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32); bulk_auth_fn (ctx, abuf, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, Ls); nblocks -= 32; abuf += 32 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx2 = 1; } } if (did_use_aesni_avx2) { if (burn_stack_depth < avx2_burn_stack_depth) burn_stack_depth = avx2_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_AESNI_AVX if (ctx->use_aesni_avx) { int did_use_aesni_avx = 0; u64 Ls[16]; u64 *l; if (nblocks >= 16) { l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) { blkn += 16; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, Ls); nblocks -= 16; abuf += 16 * CAMELLIA_BLOCK_SIZE; did_use_aesni_avx = 1; } } if (did_use_aesni_avx) { if (burn_stack_depth < avx_burn_stack_depth) burn_stack_depth = avx_burn_stack_depth; } /* Use generic code to handle smaller chunks... */ } #endif #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) /* Process remaining blocks. */ if (nblocks) { byte tmpbuf[CAMELLIA_BLOCK_SIZE * 32]; unsigned int tmp_used = CAMELLIA_BLOCK_SIZE; size_t nburn; nburn = bulk_ocb_auth_128 (c, ctx, camellia_encrypt_blk1_32, abuf, nblocks, &blkn, tmpbuf, sizeof(tmpbuf) / CAMELLIA_BLOCK_SIZE, &tmp_used); burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth; wipememory(tmpbuf, tmp_used); nblocks = 0; } c->u_mode.ocb.aad_nblocks = blkn; if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); #endif return nblocks; } /* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char* selftest_ctr_128 (void) { const int nblocks = 32+16+1; const int blocksize = CAMELLIA_BLOCK_SIZE; const int context_size = sizeof(CAMELLIA_context); return _gcry_selftest_helper_ctr("CAMELLIA", &camellia_setkey, &camellia_encrypt, nblocks, blocksize, context_size); } /* Run the self-tests for CAMELLIA-CBC-128, tests bulk CBC decryption. Returns NULL on success. */ static const char* selftest_cbc_128 (void) { const int nblocks = 32+16+2; const int blocksize = CAMELLIA_BLOCK_SIZE; const int context_size = sizeof(CAMELLIA_context); return _gcry_selftest_helper_cbc("CAMELLIA", &camellia_setkey, &camellia_encrypt, nblocks, blocksize, context_size); } /* Run the self-tests for CAMELLIA-CFB-128, tests bulk CFB decryption. Returns NULL on success. */ static const char* selftest_cfb_128 (void) { const int nblocks = 32+16+2; const int blocksize = CAMELLIA_BLOCK_SIZE; const int context_size = sizeof(CAMELLIA_context); return _gcry_selftest_helper_cfb("CAMELLIA", &camellia_setkey, &camellia_encrypt, nblocks, blocksize, context_size); } static const char * selftest(void) { CAMELLIA_context ctx; byte scratch[16]; cipher_bulk_ops_t bulk_ops; const char *r; /* These test vectors are from RFC-3713 */ static const byte plaintext[]= { 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 }; static const byte key_128[]= { 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 }; static const byte ciphertext_128[]= { 0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73, 0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43 }; static const byte key_192[]= { 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98, 0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77 }; static const byte ciphertext_192[]= { 0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8, 0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9 }; static const byte key_256[]= { 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba, 0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55, 0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff }; static const byte ciphertext_256[]= { 0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c, 0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09 }; camellia_setkey(&ctx,key_128,sizeof(key_128),&bulk_ops); camellia_encrypt(&ctx,scratch,plaintext); if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0) return "CAMELLIA-128 test encryption failed."; camellia_decrypt(&ctx,scratch,scratch); if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) return "CAMELLIA-128 test decryption failed."; camellia_setkey(&ctx,key_192,sizeof(key_192),&bulk_ops); camellia_encrypt(&ctx,scratch,plaintext); if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0) return "CAMELLIA-192 test encryption failed."; camellia_decrypt(&ctx,scratch,scratch); if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) return "CAMELLIA-192 test decryption failed."; camellia_setkey(&ctx,key_256,sizeof(key_256),&bulk_ops); camellia_encrypt(&ctx,scratch,plaintext); if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0) return "CAMELLIA-256 test encryption failed."; camellia_decrypt(&ctx,scratch,scratch); if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) return "CAMELLIA-256 test decryption failed."; if ( (r = selftest_ctr_128 ()) ) return r; if ( (r = selftest_cbc_128 ()) ) return r; if ( (r = selftest_cfb_128 ()) ) return r; return NULL; } /* These oids are from , retrieved May 1, 2007. */ static const gcry_cipher_oid_spec_t camellia128_oids[] = { {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC}, {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB}, {"0.3.4401.5.3.1.9.3", GCRY_CIPHER_MODE_OFB}, {"0.3.4401.5.3.1.9.4", GCRY_CIPHER_MODE_CFB}, { NULL } }; static const gcry_cipher_oid_spec_t camellia192_oids[] = { {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC}, {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB}, {"0.3.4401.5.3.1.9.23", GCRY_CIPHER_MODE_OFB}, {"0.3.4401.5.3.1.9.24", GCRY_CIPHER_MODE_CFB}, { NULL } }; static const gcry_cipher_oid_spec_t camellia256_oids[] = { {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC}, {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB}, {"0.3.4401.5.3.1.9.43", GCRY_CIPHER_MODE_OFB}, {"0.3.4401.5.3.1.9.44", GCRY_CIPHER_MODE_CFB}, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_camellia128 = { GCRY_CIPHER_CAMELLIA128, {0, 0}, "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128, sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt }; gcry_cipher_spec_t _gcry_cipher_spec_camellia192 = { GCRY_CIPHER_CAMELLIA192, {0, 0}, "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192, sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt }; gcry_cipher_spec_t _gcry_cipher_spec_camellia256 = { GCRY_CIPHER_CAMELLIA256, {0, 0}, "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256, sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt };