diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h index b7481255..8c897d7b 100644 --- a/cipher/cipher-internal.h +++ b/cipher/cipher-internal.h @@ -1,509 +1,509 @@ /* cipher-internal.h - Internal defs for cipher.c * Copyright (C) 2011 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifndef G10_CIPHER_INTERNAL_H #define G10_CIPHER_INTERNAL_H #include "./poly1305-internal.h" /* The maximum supported size of a block in bytes. */ #define MAX_BLOCKSIZE 16 /* The length for an OCB block. Although OCB supports any block length it does not make sense to use a 64 bit blocklen (and cipher) because this reduces the security margin to an unacceptable state. Thus we require a cipher with 128 bit blocklength. */ #define OCB_BLOCK_LEN (128/8) /* The size of the pre-computed L table for OCB. This takes the same size as the table used for GCM and thus we don't save anything by not using such a table. */ #define OCB_L_TABLE_SIZE 16 /* Check the above constants. */ #if OCB_BLOCK_LEN > MAX_BLOCKSIZE # error OCB_BLOCKLEN > MAX_BLOCKSIZE #endif /* Magic values for the context structure. */ #define CTX_MAGIC_NORMAL 0x24091964 #define CTX_MAGIC_SECURE 0x46919042 /* Try to use 16 byte aligned cipher context for better performance. We use the aligned attribute, thus it is only possible to implement this with gcc. */ #undef NEED_16BYTE_ALIGNED_CONTEXT #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED # define NEED_16BYTE_ALIGNED_CONTEXT 1 #endif /* Undef this symbol to trade GCM speed for 256 bytes of memory per context */ #define GCM_USE_TABLES 1 /* GCM_USE_INTEL_PCLMUL indicates whether to compile GCM with Intel PCLMUL code. */ #undef GCM_USE_INTEL_PCLMUL #if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES) # if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) # if __GNUC__ >= 4 # define GCM_USE_INTEL_PCLMUL 1 # endif # endif #endif /* GCM_USE_INTEL_PCLMUL */ /* GCM_USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */ #undef GCM_USE_ARM_PMULL #if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(GCM_USE_TABLES) # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) # define GCM_USE_ARM_PMULL 1 # elif defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) # define GCM_USE_ARM_PMULL 1 # endif #endif /* GCM_USE_ARM_PMULL */ typedef unsigned int (*ghash_fn_t) (gcry_cipher_hd_t c, byte *result, const byte *buf, size_t nblocks); /* A VIA processor with the Padlock engine as well as the Intel AES_NI instructions require an alignment of most data on a 16 byte boundary. Because we trick out the compiler while allocating the context, the align attribute as used in rijndael.c does not work on its own. Thus we need to make sure that the entire context structure is a aligned on that boundary. We achieve this by defining a new type and use that instead of our usual alignment type. */ typedef union { PROPERLY_ALIGNED_TYPE foo; #ifdef NEED_16BYTE_ALIGNED_CONTEXT char bar[16] __attribute__ ((aligned (16))); #endif char c[1]; } cipher_context_alignment_t; /* The handle structure. */ struct gcry_cipher_handle { int magic; size_t actual_handle_size; /* Allocated size of this handle. */ size_t handle_offset; /* Offset to the malloced block. */ gcry_cipher_spec_t *spec; /* The algorithm id. This is a hack required because the module interface does not easily allow to retrieve this value. */ int algo; /* A structure with function pointers for bulk operations. Due to limitations of the module system (we don't want to change the API) we need to keep these function pointers here. The cipher open function initializes them and the actual encryption routines use them if they are not NULL. */ struct { void (*cfb_enc)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void (*cfb_dec)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void (*cbc_enc)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); void (*cbc_dec)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void (*ctr_enc)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); size_t (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); - void (*xts_crypt)(gcry_cipher_hd_t c, unsigned char *tweak, + void (*xts_crypt)(void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); } bulk; int mode; unsigned int flags; struct { unsigned int key:1; /* Set to 1 if a key has been set. */ unsigned int iv:1; /* Set to 1 if a IV has been set. */ unsigned int tag:1; /* Set to 1 if a tag is finalized. */ unsigned int finalize:1; /* Next encrypt/decrypt has the final data. */ } marks; /* The initialization vector. For best performance we make sure that it is properly aligned. In particular some implementations of bulk operations expect an 16 byte aligned IV. IV is also used to store CBC-MAC in CCM mode; counter IV is stored in U_CTR. For OCB mode it is used for the offset value. */ union { cipher_context_alignment_t iv_align; unsigned char iv[MAX_BLOCKSIZE]; } u_iv; /* The counter for CTR mode. This field is also used by AESWRAP and thus we can't use the U_IV union. For OCB mode it is used for the checksum. */ union { cipher_context_alignment_t iv_align; unsigned char ctr[MAX_BLOCKSIZE]; } u_ctr; /* Space to save an IV or CTR for chaining operations. */ unsigned char lastiv[MAX_BLOCKSIZE]; int unused; /* Number of unused bytes in LASTIV. */ union { /* Mode specific storage for CCM mode. */ struct { u64 encryptlen; u64 aadlen; unsigned int authlen; /* Space to save partial input lengths for MAC. */ unsigned char macbuf[GCRY_CCM_BLOCK_LEN]; int mac_unused; /* Number of unprocessed bytes in MACBUF. */ unsigned char s0[GCRY_CCM_BLOCK_LEN]; unsigned int nonce:1;/* Set to 1 if nonce has been set. */ unsigned int lengths:1; /* Set to 1 if CCM length parameters has been processed. */ } ccm; /* Mode specific storage for Poly1305 mode. */ struct { /* byte counter for AAD. */ u32 aadcount[2]; /* byte counter for data. */ u32 datacount[2]; unsigned int aad_finalized:1; unsigned int bytecount_over_limits:1; poly1305_context_t ctx; } poly1305; /* Mode specific storage for CMAC mode. */ struct { unsigned int tag:1; /* Set to 1 if tag has been finalized. */ /* Subkeys for tag creation, not cleared by gcry_cipher_reset. */ unsigned char subkeys[2][MAX_BLOCKSIZE]; } cmac; /* Mode specific storage for GCM mode. */ struct { /* The interim tag for GCM mode. */ union { cipher_context_alignment_t iv_align; unsigned char tag[MAX_BLOCKSIZE]; } u_tag; /* Space to save partial input lengths for MAC. */ unsigned char macbuf[GCRY_CCM_BLOCK_LEN]; int mac_unused; /* Number of unprocessed bytes in MACBUF. */ /* byte counters for GCM */ u32 aadlen[2]; u32 datalen[2]; /* encrypted tag counter */ unsigned char tagiv[MAX_BLOCKSIZE]; unsigned int ghash_data_finalized:1; unsigned int ghash_aad_finalized:1; unsigned int datalen_over_limits:1; unsigned int disallow_encryption_because_of_setiv_in_fips_mode:1; /* --- Following members are not cleared in gcry_cipher_reset --- */ /* GHASH multiplier from key. */ union { cipher_context_alignment_t iv_align; unsigned char key[MAX_BLOCKSIZE]; } u_ghash_key; /* GHASH implementation in use. */ ghash_fn_t ghash_fn; /* Pre-calculated table for GCM. */ #ifdef GCM_USE_TABLES #if (SIZEOF_UNSIGNED_LONG == 8 || defined(__x86_64__)) #define GCM_TABLES_USE_U64 1 u64 gcm_table[2 * 16]; #else #undef GCM_TABLES_USE_U64 u32 gcm_table[4 * 16]; #endif #endif } gcm; /* Mode specific storage for OCB mode. */ struct { /* Helper variables and pre-computed table of L values. */ unsigned char L_star[OCB_BLOCK_LEN]; unsigned char L_dollar[OCB_BLOCK_LEN]; unsigned char L[OCB_BLOCK_LEN][OCB_L_TABLE_SIZE]; /* The tag is valid if marks.tag has been set. */ unsigned char tag[OCB_BLOCK_LEN]; /* A buffer to hold the offset for the AAD processing. */ unsigned char aad_offset[OCB_BLOCK_LEN]; /* A buffer to hold the current sum of AAD processing. We can't use tag here because tag may already hold the preprocessed checksum of the data. */ unsigned char aad_sum[OCB_BLOCK_LEN]; /* A buffer to store AAD data not yet processed. */ unsigned char aad_leftover[OCB_BLOCK_LEN]; /* Number of data/aad blocks processed so far. */ u64 data_nblocks; u64 aad_nblocks; /* Number of valid bytes in AAD_LEFTOVER. */ unsigned char aad_nleftover; /* Length of the tag. Fixed for now but may eventually be specified using a set of gcry_cipher_flags. */ unsigned char taglen; /* Flags indicating that the final data/aad block has been processed. */ unsigned int data_finalized:1; unsigned int aad_finalized:1; } ocb; /* Mode specific storage for XTS mode. */ struct { /* Pointer to tweak cipher context, allocated after actual * cipher context. */ char *tweak_context; } xts; } u_mode; /* What follows are two contexts of the cipher in use. The first one needs to be aligned well enough for the cipher operation whereas the second one is a copy created by cipher_setkey and used by cipher_reset. That second copy has no need for proper aligment because it is only accessed by memcpy. */ cipher_context_alignment_t context; }; /*-- cipher-cbc.c --*/ gcry_err_code_t _gcry_cipher_cbc_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cbc_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /*-- cipher-cfb.c --*/ gcry_err_code_t _gcry_cipher_cfb_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cfb_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cfb8_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cfb8_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /*-- cipher-ofb.c --*/ gcry_err_code_t _gcry_cipher_ofb_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /*-- cipher-ctr.c --*/ gcry_err_code_t _gcry_cipher_ctr_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /*-- cipher-aeswrap.c --*/ gcry_err_code_t _gcry_cipher_aeswrap_encrypt /* */ (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_aeswrap_decrypt /* */ (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen); /*-- cipher-ccm.c --*/ gcry_err_code_t _gcry_cipher_ccm_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_ccm_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_ccm_set_nonce /* */ (gcry_cipher_hd_t c, const unsigned char *nonce, size_t noncelen); gcry_err_code_t _gcry_cipher_ccm_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); gcry_err_code_t _gcry_cipher_ccm_set_lengths /* */ (gcry_cipher_hd_t c, u64 encryptedlen, u64 aadlen, u64 taglen); gcry_err_code_t _gcry_cipher_ccm_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_ccm_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); /*-- cipher-gcm.c --*/ gcry_err_code_t _gcry_cipher_gcm_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_gcm_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_gcm_setiv /* */ (gcry_cipher_hd_t c, const unsigned char *iv, size_t ivlen); gcry_err_code_t _gcry_cipher_gcm_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *aadbuf, size_t aadbuflen); gcry_err_code_t _gcry_cipher_gcm_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_gcm_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); void _gcry_cipher_gcm_setkey /* */ (gcry_cipher_hd_t c); /*-- cipher-poly1305.c --*/ gcry_err_code_t _gcry_cipher_poly1305_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_poly1305_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_poly1305_setiv /* */ (gcry_cipher_hd_t c, const unsigned char *iv, size_t ivlen); gcry_err_code_t _gcry_cipher_poly1305_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *aadbuf, size_t aadbuflen); gcry_err_code_t _gcry_cipher_poly1305_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_poly1305_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); void _gcry_cipher_poly1305_setkey /* */ (gcry_cipher_hd_t c); /*-- cipher-ocb.c --*/ gcry_err_code_t _gcry_cipher_ocb_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_ocb_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_ocb_set_nonce /* */ (gcry_cipher_hd_t c, const unsigned char *nonce, size_t noncelen); gcry_err_code_t _gcry_cipher_ocb_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); gcry_err_code_t _gcry_cipher_ocb_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_ocb_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); /*-- cipher-xts.c --*/ gcry_err_code_t _gcry_cipher_xts_crypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen, int encrypt); /* Return the L-value for block N. Note: 'cipher_ocb.c' ensures that N * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can * be directly passed to _gcry_ctz() function and resulting index will * never overflow the table. */ static inline const unsigned char * ocb_get_l (gcry_cipher_hd_t c, u64 n) { unsigned long ntz; #if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4) /* Assumes that N != 0. */ asm ("rep;bsfl %k[low], %k[ntz]\n\t" : [ntz] "=r" (ntz) : [low] "r" ((unsigned long)n) : "cc"); #else ntz = _gcry_ctz (n); #endif return c->u_mode.ocb.L[ntz]; } #endif /*G10_CIPHER_INTERNAL_H*/ diff --git a/cipher/cipher-xts.c b/cipher/cipher-xts.c index 4da89e55..06cefbe0 100644 --- a/cipher/cipher-xts.c +++ b/cipher/cipher-xts.c @@ -1,170 +1,171 @@ /* cipher-xts.c - XTS mode implementation * Copyright (C) 2017 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "./cipher-internal.h" static inline void xts_gfmul_byA (unsigned char *out, const unsigned char *in) { u64 hi = buf_get_le64 (in + 8); u64 lo = buf_get_le64 (in + 0); u64 carry = -(hi >> 63) & 0x87; hi = (hi << 1) + (lo >> 63); lo = (lo << 1) ^ carry; buf_put_le64 (out + 8, hi); buf_put_le64 (out + 0, lo); } static inline void xts_inc128 (unsigned char *seqno) { u64 lo = buf_get_le64 (seqno + 0); u64 hi = buf_get_le64 (seqno + 8); hi += !(++lo); buf_put_le64 (seqno + 0, lo); buf_put_le64 (seqno + 8, hi); } gcry_err_code_t _gcry_cipher_xts_crypt (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen, int encrypt) { gcry_cipher_encrypt_t tweak_fn = c->spec->encrypt; gcry_cipher_encrypt_t crypt_fn = encrypt ? c->spec->encrypt : c->spec->decrypt; union { cipher_context_alignment_t xcx; byte x1[GCRY_XTS_BLOCK_LEN]; u64 x64[GCRY_XTS_BLOCK_LEN / sizeof(u64)]; } tmp; unsigned int burn, nburn; size_t nblocks; if (c->spec->blocksize != GCRY_XTS_BLOCK_LEN) return GPG_ERR_CIPHER_ALGO; if (outbuflen < inbuflen) return GPG_ERR_BUFFER_TOO_SHORT; if (inbuflen < GCRY_XTS_BLOCK_LEN) return GPG_ERR_BUFFER_TOO_SHORT; /* Data-unit max length: 2^20 blocks. */ if (inbuflen > GCRY_XTS_BLOCK_LEN << 20) return GPG_ERR_INV_LENGTH; nblocks = inbuflen / GCRY_XTS_BLOCK_LEN; nblocks -= !encrypt && (inbuflen % GCRY_XTS_BLOCK_LEN) != 0; /* Generate first tweak value. */ burn = tweak_fn (c->u_mode.xts.tweak_context, c->u_ctr.ctr, c->u_iv.iv); /* Use a bulk method if available. */ if (nblocks && c->bulk.xts_crypt) { - c->bulk.xts_crypt (c, c->u_ctr.ctr, outbuf, inbuf, nblocks, encrypt); + c->bulk.xts_crypt (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks, + encrypt); inbuf += nblocks * GCRY_XTS_BLOCK_LEN; outbuf += nblocks * GCRY_XTS_BLOCK_LEN; inbuflen -= nblocks * GCRY_XTS_BLOCK_LEN; nblocks = 0; } /* If we don't have a bulk method use the standard method. We also use this method for the a remaining partial block. */ while (nblocks) { /* Xor-Encrypt/Decrypt-Xor block. */ buf_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1); burn = nburn > burn ? nburn : burn; buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); outbuf += GCRY_XTS_BLOCK_LEN; inbuf += GCRY_XTS_BLOCK_LEN; inbuflen -= GCRY_XTS_BLOCK_LEN; nblocks--; /* Generate next tweak. */ xts_gfmul_byA (c->u_ctr.ctr, c->u_ctr.ctr); } /* Handle remaining data with ciphertext stealing. */ if (inbuflen) { if (!encrypt) { gcry_assert (inbuflen > GCRY_XTS_BLOCK_LEN); gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN * 2); /* Generate last tweak. */ xts_gfmul_byA (tmp.x1, c->u_ctr.ctr); /* Decrypt last block first. */ buf_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); nburn = crypt_fn (&c->context.c, outbuf, outbuf); burn = nburn > burn ? nburn : burn; buf_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); inbuflen -= GCRY_XTS_BLOCK_LEN; inbuf += GCRY_XTS_BLOCK_LEN; outbuf += GCRY_XTS_BLOCK_LEN; } gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN); outbuf -= GCRY_XTS_BLOCK_LEN; /* Steal ciphertext from previous block. */ buf_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN); buf_cpy (tmp.x64, inbuf, inbuflen); buf_cpy (outbuf + GCRY_XTS_BLOCK_LEN, outbuf, inbuflen); /* Decrypt/Encrypt last block. */ buf_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1); burn = nburn > burn ? nburn : burn; buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); } /* Auto-increment data-unit sequence number */ xts_inc128 (c->u_iv.iv); wipememory (&tmp, sizeof(tmp)); wipememory (c->u_ctr.ctr, sizeof(c->u_ctr.ctr)); if (burn > 0) _gcry_burn_stack (burn + 4 * sizeof(void *)); return 0; } diff --git a/cipher/cipher.c b/cipher/cipher.c index 98127386..063c13da 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -1,1680 +1,1681 @@ /* cipher.c - cipher dispatcher * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. * Copyright (C) 2013 g10 Code GmbH * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include #include "g10lib.h" #include "../src/gcrypt-testapi.h" #include "cipher.h" #include "./cipher-internal.h" /* This is the list of the default ciphers, which are included in libgcrypt. */ static gcry_cipher_spec_t *cipher_list[] = { #if USE_BLOWFISH &_gcry_cipher_spec_blowfish, #endif #if USE_DES &_gcry_cipher_spec_des, &_gcry_cipher_spec_tripledes, #endif #if USE_ARCFOUR &_gcry_cipher_spec_arcfour, #endif #if USE_CAST5 &_gcry_cipher_spec_cast5, #endif #if USE_AES &_gcry_cipher_spec_aes, &_gcry_cipher_spec_aes192, &_gcry_cipher_spec_aes256, #endif #if USE_TWOFISH &_gcry_cipher_spec_twofish, &_gcry_cipher_spec_twofish128, #endif #if USE_SERPENT &_gcry_cipher_spec_serpent128, &_gcry_cipher_spec_serpent192, &_gcry_cipher_spec_serpent256, #endif #if USE_RFC2268 &_gcry_cipher_spec_rfc2268_40, &_gcry_cipher_spec_rfc2268_128, #endif #if USE_SEED &_gcry_cipher_spec_seed, #endif #if USE_CAMELLIA &_gcry_cipher_spec_camellia128, &_gcry_cipher_spec_camellia192, &_gcry_cipher_spec_camellia256, #endif #ifdef USE_IDEA &_gcry_cipher_spec_idea, #endif #if USE_SALSA20 &_gcry_cipher_spec_salsa20, &_gcry_cipher_spec_salsa20r12, #endif #if USE_GOST28147 &_gcry_cipher_spec_gost28147, #endif #if USE_CHACHA20 &_gcry_cipher_spec_chacha20, #endif NULL }; static int map_algo (int algo) { return algo; } /* Return the spec structure for the cipher algorithm ALGO. For an unknown algorithm NULL is returned. */ static gcry_cipher_spec_t * spec_from_algo (int algo) { int idx; gcry_cipher_spec_t *spec; algo = map_algo (algo); for (idx = 0; (spec = cipher_list[idx]); idx++) if (algo == spec->algo) return spec; return NULL; } /* Lookup a cipher's spec by its name. */ static gcry_cipher_spec_t * spec_from_name (const char *name) { gcry_cipher_spec_t *spec; int idx; const char **aliases; for (idx=0; (spec = cipher_list[idx]); idx++) { if (!stricmp (name, spec->name)) return spec; if (spec->aliases) { for (aliases = spec->aliases; *aliases; aliases++) if (!stricmp (name, *aliases)) return spec; } } return NULL; } /* Lookup a cipher's spec by its OID. */ static gcry_cipher_spec_t * spec_from_oid (const char *oid) { gcry_cipher_spec_t *spec; gcry_cipher_oid_spec_t *oid_specs; int idx, j; for (idx=0; (spec = cipher_list[idx]); idx++) { oid_specs = spec->oids; if (oid_specs) { for (j = 0; oid_specs[j].oid; j++) if (!stricmp (oid, oid_specs[j].oid)) return spec; } } return NULL; } /* Locate the OID in the oid table and return the spec or NULL if not found. An optional "oid." or "OID." prefix in OID is ignored, the OID is expected to be in standard IETF dotted notation. A pointer to the OID specification of the module implementing this algorithm is return in OID_SPEC unless passed as NULL.*/ static gcry_cipher_spec_t * search_oid (const char *oid, gcry_cipher_oid_spec_t *oid_spec) { gcry_cipher_spec_t *spec; int i; if (!oid) return NULL; if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4)) oid += 4; spec = spec_from_oid (oid); if (spec && spec->oids) { for (i = 0; spec->oids[i].oid; i++) if (!stricmp (oid, spec->oids[i].oid)) { if (oid_spec) *oid_spec = spec->oids[i]; return spec; } } return NULL; } /* Map STRING to the cipher algorithm identifier. Returns the algorithm ID of the cipher for the given name or 0 if the name is not known. It is valid to pass NULL for STRING which results in a return value of 0. */ int _gcry_cipher_map_name (const char *string) { gcry_cipher_spec_t *spec; if (!string) return 0; /* If the string starts with a digit (optionally prefixed with either "OID." or "oid."), we first look into our table of ASN.1 object identifiers to figure out the algorithm */ spec = search_oid (string, NULL); if (spec) return spec->algo; spec = spec_from_name (string); if (spec) return spec->algo; return 0; } /* Given a STRING with an OID in dotted decimal notation, this function returns the cipher mode (GCRY_CIPHER_MODE_*) associated with that OID or 0 if no mode is known. Passing NULL for string yields a return value of 0. */ int _gcry_cipher_mode_from_oid (const char *string) { gcry_cipher_spec_t *spec; gcry_cipher_oid_spec_t oid_spec; if (!string) return 0; spec = search_oid (string, &oid_spec); if (spec) return oid_spec.mode; return 0; } /* Map the cipher algorithm identifier ALGORITHM to a string representing this algorithm. This string is the default name as used by Libgcrypt. A "?" is returned for an unknown algorithm. NULL is never returned. */ const char * _gcry_cipher_algo_name (int algorithm) { gcry_cipher_spec_t *spec; spec = spec_from_algo (algorithm); return spec? spec->name : "?"; } /* Flag the cipher algorithm with the identifier ALGORITHM as disabled. There is no error return, the function does nothing for unknown algorithms. Disabled algorithms are virtually not available in Libgcrypt. This is not thread safe and should thus be called early. */ static void disable_cipher_algo (int algo) { gcry_cipher_spec_t *spec = spec_from_algo (algo); if (spec) spec->flags.disabled = 1; } /* Return 0 if the cipher algorithm with identifier ALGORITHM is available. Returns a basic error code value if it is not available. */ static gcry_err_code_t check_cipher_algo (int algorithm) { gcry_cipher_spec_t *spec; spec = spec_from_algo (algorithm); if (spec && !spec->flags.disabled) return 0; return GPG_ERR_CIPHER_ALGO; } /* Return the standard length in bits of the key for the cipher algorithm with the identifier ALGORITHM. */ static unsigned int cipher_get_keylen (int algorithm) { gcry_cipher_spec_t *spec; unsigned len = 0; spec = spec_from_algo (algorithm); if (spec) { len = spec->keylen; if (!len) log_bug ("cipher %d w/o key length\n", algorithm); } return len; } /* Return the block length of the cipher algorithm with the identifier ALGORITHM. This function return 0 for an invalid algorithm. */ static unsigned int cipher_get_blocksize (int algorithm) { gcry_cipher_spec_t *spec; unsigned len = 0; spec = spec_from_algo (algorithm); if (spec) { len = spec->blocksize; if (!len) log_bug ("cipher %d w/o blocksize\n", algorithm); } return len; } /* Open a cipher handle for use with cipher algorithm ALGORITHM, using the cipher mode MODE (one of the GCRY_CIPHER_MODE_*) and return a handle in HANDLE. Put NULL into HANDLE and return an error code if something goes wrong. FLAGS may be used to modify the operation. The defined flags are: GCRY_CIPHER_SECURE: allocate all internal buffers in secure memory. GCRY_CIPHER_ENABLE_SYNC: Enable the sync operation as used in OpenPGP. GCRY_CIPHER_CBC_CTS: Enable CTS mode. GCRY_CIPHER_CBC_MAC: Enable MAC mode. Values for these flags may be combined using OR. */ gcry_err_code_t _gcry_cipher_open (gcry_cipher_hd_t *handle, int algo, int mode, unsigned int flags) { gcry_err_code_t rc; gcry_cipher_hd_t h = NULL; if (mode >= GCRY_CIPHER_MODE_INTERNAL) rc = GPG_ERR_INV_CIPHER_MODE; else rc = _gcry_cipher_open_internal (&h, algo, mode, flags); *handle = rc ? NULL : h; return rc; } gcry_err_code_t _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, int algo, int mode, unsigned int flags) { int secure = (flags & GCRY_CIPHER_SECURE); gcry_cipher_spec_t *spec; gcry_cipher_hd_t h = NULL; gcry_err_code_t err; /* If the application missed to call the random poll function, we do it here to ensure that it is used once in a while. */ _gcry_fast_random_poll (); spec = spec_from_algo (algo); if (!spec) err = GPG_ERR_CIPHER_ALGO; else if (spec->flags.disabled) err = GPG_ERR_CIPHER_ALGO; else err = 0; /* check flags */ if ((! err) && ((flags & ~(0 | GCRY_CIPHER_SECURE | GCRY_CIPHER_ENABLE_SYNC | GCRY_CIPHER_CBC_CTS | GCRY_CIPHER_CBC_MAC)) || (flags & GCRY_CIPHER_CBC_CTS & GCRY_CIPHER_CBC_MAC))) err = GPG_ERR_CIPHER_ALGO; /* check that a valid mode has been requested */ if (! err) switch (mode) { case GCRY_CIPHER_MODE_CCM: if (spec->blocksize != GCRY_CCM_BLOCK_LEN) err = GPG_ERR_INV_CIPHER_MODE; if (!spec->encrypt || !spec->decrypt) err = GPG_ERR_INV_CIPHER_MODE; break; case GCRY_CIPHER_MODE_XTS: if (spec->blocksize != GCRY_XTS_BLOCK_LEN) err = GPG_ERR_INV_CIPHER_MODE; if (!spec->encrypt || !spec->decrypt) err = GPG_ERR_INV_CIPHER_MODE; break; case GCRY_CIPHER_MODE_ECB: case GCRY_CIPHER_MODE_CBC: case GCRY_CIPHER_MODE_CFB: case GCRY_CIPHER_MODE_CFB8: case GCRY_CIPHER_MODE_OFB: case GCRY_CIPHER_MODE_CTR: case GCRY_CIPHER_MODE_AESWRAP: case GCRY_CIPHER_MODE_CMAC: case GCRY_CIPHER_MODE_GCM: if (!spec->encrypt || !spec->decrypt) err = GPG_ERR_INV_CIPHER_MODE; break; case GCRY_CIPHER_MODE_POLY1305: if (!spec->stencrypt || !spec->stdecrypt || !spec->setiv) err = GPG_ERR_INV_CIPHER_MODE; else if (spec->algo != GCRY_CIPHER_CHACHA20) err = GPG_ERR_INV_CIPHER_MODE; break; case GCRY_CIPHER_MODE_OCB: /* Note that our implementation allows only for 128 bit block length algorithms. Lower block lengths would be possible but we do not implement them because they limit the security too much. */ if (!spec->encrypt || !spec->decrypt) err = GPG_ERR_INV_CIPHER_MODE; else if (spec->blocksize != (128/8)) err = GPG_ERR_INV_CIPHER_MODE; break; case GCRY_CIPHER_MODE_STREAM: if (!spec->stencrypt || !spec->stdecrypt) err = GPG_ERR_INV_CIPHER_MODE; break; case GCRY_CIPHER_MODE_NONE: /* This mode may be used for debugging. It copies the main text verbatim to the ciphertext. We do not allow this in fips mode or if no debug flag has been set. */ if (fips_mode () || !_gcry_get_debug_flag (0)) err = GPG_ERR_INV_CIPHER_MODE; break; default: err = GPG_ERR_INV_CIPHER_MODE; } /* Perform selftest here and mark this with a flag in cipher_table? No, we should not do this as it takes too long. Further it does not make sense to exclude algorithms with failing selftests at runtime: If a selftest fails there is something seriously wrong with the system and thus we better die immediately. */ if (! err) { size_t size = (sizeof (*h) + 2 * spec->contextsize - sizeof (cipher_context_alignment_t) #ifdef NEED_16BYTE_ALIGNED_CONTEXT + 15 /* Space for leading alignment gap. */ #endif /*NEED_16BYTE_ALIGNED_CONTEXT*/ ); /* Space needed per mode. */ switch (mode) { case GCRY_CIPHER_MODE_XTS: /* Additional cipher context for tweak. */ size += 2 * spec->contextsize + 15; break; default: break; } if (secure) h = xtrycalloc_secure (1, size); else h = xtrycalloc (1, size); if (! h) err = gpg_err_code_from_syserror (); else { size_t off = 0; char *tc; #ifdef NEED_16BYTE_ALIGNED_CONTEXT if ( ((uintptr_t)h & 0x0f) ) { /* The malloced block is not aligned on a 16 byte boundary. Correct for this. */ off = 16 - ((uintptr_t)h & 0x0f); h = (void*)((char*)h + off); } #endif /*NEED_16BYTE_ALIGNED_CONTEXT*/ h->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL; h->actual_handle_size = size - off; h->handle_offset = off; h->spec = spec; h->algo = algo; h->mode = mode; h->flags = flags; /* Setup bulk encryption routines. */ switch (algo) { #ifdef USE_AES case GCRY_CIPHER_AES128: case GCRY_CIPHER_AES192: case GCRY_CIPHER_AES256: h->bulk.cfb_enc = _gcry_aes_cfb_enc; h->bulk.cfb_dec = _gcry_aes_cfb_dec; h->bulk.cbc_enc = _gcry_aes_cbc_enc; h->bulk.cbc_dec = _gcry_aes_cbc_dec; h->bulk.ctr_enc = _gcry_aes_ctr_enc; h->bulk.ocb_crypt = _gcry_aes_ocb_crypt; h->bulk.ocb_auth = _gcry_aes_ocb_auth; + h->bulk.xts_crypt = _gcry_aes_xts_crypt; break; #endif /*USE_AES*/ #ifdef USE_BLOWFISH case GCRY_CIPHER_BLOWFISH: h->bulk.cfb_dec = _gcry_blowfish_cfb_dec; h->bulk.cbc_dec = _gcry_blowfish_cbc_dec; h->bulk.ctr_enc = _gcry_blowfish_ctr_enc; break; #endif /*USE_BLOWFISH*/ #ifdef USE_CAST5 case GCRY_CIPHER_CAST5: h->bulk.cfb_dec = _gcry_cast5_cfb_dec; h->bulk.cbc_dec = _gcry_cast5_cbc_dec; h->bulk.ctr_enc = _gcry_cast5_ctr_enc; break; #endif /*USE_CAMELLIA*/ #ifdef USE_CAMELLIA case GCRY_CIPHER_CAMELLIA128: case GCRY_CIPHER_CAMELLIA192: case GCRY_CIPHER_CAMELLIA256: h->bulk.cbc_dec = _gcry_camellia_cbc_dec; h->bulk.cfb_dec = _gcry_camellia_cfb_dec; h->bulk.ctr_enc = _gcry_camellia_ctr_enc; h->bulk.ocb_crypt = _gcry_camellia_ocb_crypt; h->bulk.ocb_auth = _gcry_camellia_ocb_auth; break; #endif /*USE_CAMELLIA*/ #ifdef USE_DES case GCRY_CIPHER_3DES: h->bulk.cbc_dec = _gcry_3des_cbc_dec; h->bulk.cfb_dec = _gcry_3des_cfb_dec; h->bulk.ctr_enc = _gcry_3des_ctr_enc; break; #endif /*USE_DES*/ #ifdef USE_SERPENT case GCRY_CIPHER_SERPENT128: case GCRY_CIPHER_SERPENT192: case GCRY_CIPHER_SERPENT256: h->bulk.cbc_dec = _gcry_serpent_cbc_dec; h->bulk.cfb_dec = _gcry_serpent_cfb_dec; h->bulk.ctr_enc = _gcry_serpent_ctr_enc; h->bulk.ocb_crypt = _gcry_serpent_ocb_crypt; h->bulk.ocb_auth = _gcry_serpent_ocb_auth; break; #endif /*USE_SERPENT*/ #ifdef USE_TWOFISH case GCRY_CIPHER_TWOFISH: case GCRY_CIPHER_TWOFISH128: h->bulk.cbc_dec = _gcry_twofish_cbc_dec; h->bulk.cfb_dec = _gcry_twofish_cfb_dec; h->bulk.ctr_enc = _gcry_twofish_ctr_enc; h->bulk.ocb_crypt = _gcry_twofish_ocb_crypt; h->bulk.ocb_auth = _gcry_twofish_ocb_auth; break; #endif /*USE_TWOFISH*/ default: break; } /* Setup defaults depending on the mode. */ switch (mode) { case GCRY_CIPHER_MODE_OCB: h->u_mode.ocb.taglen = 16; /* Bytes. */ break; case GCRY_CIPHER_MODE_XTS: tc = h->context.c + spec->contextsize * 2; tc += (16 - (uintptr_t)tc % 16) % 16; h->u_mode.xts.tweak_context = tc; break; default: break; } } } /* Done. */ *handle = err ? NULL : h; return err; } /* Release all resources associated with the cipher handle H. H may be NULL in which case this is a no-operation. */ void _gcry_cipher_close (gcry_cipher_hd_t h) { size_t off; if (!h) return; if ((h->magic != CTX_MAGIC_SECURE) && (h->magic != CTX_MAGIC_NORMAL)) _gcry_fatal_error(GPG_ERR_INTERNAL, "gcry_cipher_close: already closed/invalid handle"); else h->magic = 0; /* We always want to wipe out the memory even when the context has been allocated in secure memory. The user might have disabled secure memory or is using his own implementation which does not do the wiping. To accomplish this we need to keep track of the actual size of this structure because we have no way to known how large the allocated area was when using a standard malloc. */ off = h->handle_offset; wipememory (h, h->actual_handle_size); xfree ((char*)h - off); } /* Set the key to be used for the encryption context C to KEY with length KEYLEN. The length should match the required length. */ static gcry_err_code_t cipher_setkey (gcry_cipher_hd_t c, byte *key, size_t keylen) { gcry_err_code_t rc; if (c->mode == GCRY_CIPHER_MODE_XTS) { /* XTS uses two keys. */ if (keylen % 2) return GPG_ERR_INV_KEYLEN; keylen /= 2; if (fips_mode ()) { /* Reject key if subkeys Key_1 and Key_2 are equal. See "Implementation Guidance for FIPS 140-2, A.9 XTS-AES Key Generation Requirements" for details. */ if (buf_eq_const (key, key + keylen, keylen)) return GPG_ERR_WEAK_KEY; } } rc = c->spec->setkey (&c->context.c, key, keylen); if (!rc) { /* Duplicate initial context. */ memcpy ((void *) ((char *) &c->context.c + c->spec->contextsize), (void *) &c->context.c, c->spec->contextsize); c->marks.key = 1; switch (c->mode) { case GCRY_CIPHER_MODE_CMAC: _gcry_cipher_cmac_set_subkeys (c); break; case GCRY_CIPHER_MODE_GCM: _gcry_cipher_gcm_setkey (c); break; case GCRY_CIPHER_MODE_POLY1305: _gcry_cipher_poly1305_setkey (c); break; case GCRY_CIPHER_MODE_XTS: /* Setup tweak cipher with second part of XTS key. */ rc = c->spec->setkey (c->u_mode.xts.tweak_context, key + keylen, keylen); if (!rc) { /* Duplicate initial tweak context. */ memcpy (c->u_mode.xts.tweak_context + c->spec->contextsize, c->u_mode.xts.tweak_context, c->spec->contextsize); } else c->marks.key = 0; break; default: break; }; } else c->marks.key = 0; return rc; } /* Set the IV to be used for the encryption context C to IV with length IVLEN. The length should match the required length. */ static gcry_err_code_t cipher_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) { /* If the cipher has its own IV handler, we use only this one. This is currently used for stream ciphers requiring a nonce. */ if (c->spec->setiv) { c->spec->setiv (&c->context.c, iv, ivlen); return 0; } memset (c->u_iv.iv, 0, c->spec->blocksize); if (iv) { if (ivlen != c->spec->blocksize) { log_info ("WARNING: cipher_setiv: ivlen=%u blklen=%u\n", (unsigned int)ivlen, (unsigned int)c->spec->blocksize); fips_signal_error ("IV length does not match blocklength"); } if (ivlen > c->spec->blocksize) ivlen = c->spec->blocksize; memcpy (c->u_iv.iv, iv, ivlen); c->marks.iv = 1; } else c->marks.iv = 0; c->unused = 0; return 0; } /* Reset the cipher context to the initial context. This is basically the same as an release followed by a new. */ static void cipher_reset (gcry_cipher_hd_t c) { unsigned int marks_key; marks_key = c->marks.key; memcpy (&c->context.c, (char *) &c->context.c + c->spec->contextsize, c->spec->contextsize); memset (&c->marks, 0, sizeof c->marks); memset (c->u_iv.iv, 0, c->spec->blocksize); memset (c->lastiv, 0, c->spec->blocksize); memset (c->u_ctr.ctr, 0, c->spec->blocksize); c->unused = 0; c->marks.key = marks_key; switch (c->mode) { case GCRY_CIPHER_MODE_CMAC: /* Only clear 'tag' for cmac, keep subkeys. */ c->u_mode.cmac.tag = 0; break; case GCRY_CIPHER_MODE_GCM: /* Only clear head of u_mode, keep ghash_key and gcm_table. */ { byte *u_mode_pos = (void *)&c->u_mode; byte *ghash_key_pos = c->u_mode.gcm.u_ghash_key.key; size_t u_mode_head_length = ghash_key_pos - u_mode_pos; memset (&c->u_mode, 0, u_mode_head_length); } break; case GCRY_CIPHER_MODE_POLY1305: memset (&c->u_mode.poly1305, 0, sizeof c->u_mode.poly1305); break; case GCRY_CIPHER_MODE_CCM: memset (&c->u_mode.ccm, 0, sizeof c->u_mode.ccm); break; case GCRY_CIPHER_MODE_OCB: memset (&c->u_mode.ocb, 0, sizeof c->u_mode.ocb); /* Setup default taglen. */ c->u_mode.ocb.taglen = 16; break; case GCRY_CIPHER_MODE_XTS: memcpy (c->u_mode.xts.tweak_context, c->u_mode.xts.tweak_context + c->spec->contextsize, c->spec->contextsize); break; default: break; /* u_mode unused by other modes. */ } } static gcry_err_code_t do_ecb_crypt (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen, gcry_cipher_encrypt_t crypt_fn) { unsigned int blocksize = c->spec->blocksize; size_t n, nblocks; unsigned int burn, nburn; if (outbuflen < inbuflen) return GPG_ERR_BUFFER_TOO_SHORT; if ((inbuflen % blocksize)) return GPG_ERR_INV_LENGTH; nblocks = inbuflen / blocksize; burn = 0; for (n=0; n < nblocks; n++ ) { nburn = crypt_fn (&c->context.c, outbuf, inbuf); burn = nburn > burn ? nburn : burn; inbuf += blocksize; outbuf += blocksize; } if (burn > 0) _gcry_burn_stack (burn + 4 * sizeof(void *)); return 0; } static gcry_err_code_t do_ecb_encrypt (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen) { return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, c->spec->encrypt); } static gcry_err_code_t do_ecb_decrypt (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen) { return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, c->spec->decrypt); } /**************** * Encrypt INBUF to OUTBUF with the mode selected at open. * inbuf and outbuf may overlap or be the same. * Depending on the mode some constraints apply to INBUFLEN. */ static gcry_err_code_t cipher_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen) { gcry_err_code_t rc; if (c->mode != GCRY_CIPHER_MODE_NONE && !c->marks.key) { log_error ("cipher_encrypt: key not set\n"); return GPG_ERR_MISSING_KEY; } switch (c->mode) { case GCRY_CIPHER_MODE_ECB: rc = do_ecb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CBC: rc = _gcry_cipher_cbc_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CFB: rc = _gcry_cipher_cfb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CFB8: rc = _gcry_cipher_cfb8_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_OFB: rc = _gcry_cipher_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CTR: rc = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_AESWRAP: rc = _gcry_cipher_aeswrap_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CCM: rc = _gcry_cipher_ccm_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CMAC: rc = GPG_ERR_INV_CIPHER_MODE; break; case GCRY_CIPHER_MODE_GCM: rc = _gcry_cipher_gcm_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_POLY1305: rc = _gcry_cipher_poly1305_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_OCB: rc = _gcry_cipher_ocb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_XTS: rc = _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1); break; case GCRY_CIPHER_MODE_STREAM: c->spec->stencrypt (&c->context.c, outbuf, (byte*)/*arggg*/inbuf, inbuflen); rc = 0; break; case GCRY_CIPHER_MODE_NONE: if (fips_mode () || !_gcry_get_debug_flag (0)) { fips_signal_error ("cipher mode NONE used"); rc = GPG_ERR_INV_CIPHER_MODE; } else { if (inbuf != outbuf) memmove (outbuf, inbuf, inbuflen); rc = 0; } break; default: log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode ); rc = GPG_ERR_INV_CIPHER_MODE; break; } return rc; } /**************** * Encrypt IN and write it to OUT. If IN is NULL, in-place encryption has * been requested. */ gcry_err_code_t _gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize, const void *in, size_t inlen) { gcry_err_code_t rc; if (!in) /* Caller requested in-place encryption. */ { in = out; inlen = outsize; } rc = cipher_encrypt (h, out, outsize, in, inlen); /* Failsafe: Make sure that the plaintext will never make it into OUT if the encryption returned an error. */ if (rc && out) memset (out, 0x42, outsize); return rc; } /**************** * Decrypt INBUF to OUTBUF with the mode selected at open. * inbuf and outbuf may overlap or be the same. * Depending on the mode some some constraints apply to INBUFLEN. */ static gcry_err_code_t cipher_decrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen) { gcry_err_code_t rc; if (c->mode != GCRY_CIPHER_MODE_NONE && !c->marks.key) { log_error ("cipher_decrypt: key not set\n"); return GPG_ERR_MISSING_KEY; } switch (c->mode) { case GCRY_CIPHER_MODE_ECB: rc = do_ecb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CBC: rc = _gcry_cipher_cbc_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CFB: rc = _gcry_cipher_cfb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CFB8: rc = _gcry_cipher_cfb8_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_OFB: rc = _gcry_cipher_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CTR: rc = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_AESWRAP: rc = _gcry_cipher_aeswrap_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CCM: rc = _gcry_cipher_ccm_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_CMAC: rc = GPG_ERR_INV_CIPHER_MODE; break; case GCRY_CIPHER_MODE_GCM: rc = _gcry_cipher_gcm_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_POLY1305: rc = _gcry_cipher_poly1305_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_OCB: rc = _gcry_cipher_ocb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen); break; case GCRY_CIPHER_MODE_XTS: rc = _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0); break; case GCRY_CIPHER_MODE_STREAM: c->spec->stdecrypt (&c->context.c, outbuf, (byte*)/*arggg*/inbuf, inbuflen); rc = 0; break; case GCRY_CIPHER_MODE_NONE: if (fips_mode () || !_gcry_get_debug_flag (0)) { fips_signal_error ("cipher mode NONE used"); rc = GPG_ERR_INV_CIPHER_MODE; } else { if (inbuf != outbuf) memmove (outbuf, inbuf, inbuflen); rc = 0; } break; default: log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode ); rc = GPG_ERR_INV_CIPHER_MODE; break; } return rc; } gcry_err_code_t _gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize, const void *in, size_t inlen) { if (!in) /* Caller requested in-place encryption. */ { in = out; inlen = outsize; } return cipher_decrypt (h, out, outsize, in, inlen); } /**************** * Used for PGP's somewhat strange CFB mode. Only works if * the corresponding flag is set. */ static void cipher_sync (gcry_cipher_hd_t c) { if ((c->flags & GCRY_CIPHER_ENABLE_SYNC) && c->unused) { memmove (c->u_iv.iv + c->unused, c->u_iv.iv, c->spec->blocksize - c->unused); memcpy (c->u_iv.iv, c->lastiv + c->spec->blocksize - c->unused, c->unused); c->unused = 0; } } gcry_err_code_t _gcry_cipher_setkey (gcry_cipher_hd_t hd, const void *key, size_t keylen) { return cipher_setkey (hd, (void*)key, keylen); } gcry_err_code_t _gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, size_t ivlen) { gcry_err_code_t rc = 0; switch (hd->mode) { case GCRY_CIPHER_MODE_CCM: rc = _gcry_cipher_ccm_set_nonce (hd, iv, ivlen); break; case GCRY_CIPHER_MODE_GCM: rc = _gcry_cipher_gcm_setiv (hd, iv, ivlen); break; case GCRY_CIPHER_MODE_POLY1305: rc = _gcry_cipher_poly1305_setiv (hd, iv, ivlen); break; case GCRY_CIPHER_MODE_OCB: rc = _gcry_cipher_ocb_set_nonce (hd, iv, ivlen); break; default: rc = cipher_setiv (hd, iv, ivlen); break; } return rc; } /* Set counter for CTR mode. (CTR,CTRLEN) must denote a buffer of block size length, or (NULL,0) to set the CTR to the all-zero block. */ gpg_err_code_t _gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, size_t ctrlen) { if (ctr && ctrlen == hd->spec->blocksize) { memcpy (hd->u_ctr.ctr, ctr, hd->spec->blocksize); hd->unused = 0; } else if (!ctr || !ctrlen) { memset (hd->u_ctr.ctr, 0, hd->spec->blocksize); hd->unused = 0; } else return GPG_ERR_INV_ARG; return 0; } gpg_err_code_t _gcry_cipher_getctr (gcry_cipher_hd_t hd, void *ctr, size_t ctrlen) { if (ctr && ctrlen == hd->spec->blocksize) memcpy (ctr, hd->u_ctr.ctr, hd->spec->blocksize); else return GPG_ERR_INV_ARG; return 0; } gcry_err_code_t _gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf, size_t abuflen) { gcry_err_code_t rc; switch (hd->mode) { case GCRY_CIPHER_MODE_CCM: rc = _gcry_cipher_ccm_authenticate (hd, abuf, abuflen); break; case GCRY_CIPHER_MODE_CMAC: rc = _gcry_cipher_cmac_authenticate (hd, abuf, abuflen); break; case GCRY_CIPHER_MODE_GCM: rc = _gcry_cipher_gcm_authenticate (hd, abuf, abuflen); break; case GCRY_CIPHER_MODE_POLY1305: rc = _gcry_cipher_poly1305_authenticate (hd, abuf, abuflen); break; case GCRY_CIPHER_MODE_OCB: rc = _gcry_cipher_ocb_authenticate (hd, abuf, abuflen); break; default: log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode); rc = GPG_ERR_INV_CIPHER_MODE; break; } return rc; } gcry_err_code_t _gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen) { gcry_err_code_t rc; switch (hd->mode) { case GCRY_CIPHER_MODE_CCM: rc = _gcry_cipher_ccm_get_tag (hd, outtag, taglen); break; case GCRY_CIPHER_MODE_CMAC: rc = _gcry_cipher_cmac_get_tag (hd, outtag, taglen); break; case GCRY_CIPHER_MODE_GCM: rc = _gcry_cipher_gcm_get_tag (hd, outtag, taglen); break; case GCRY_CIPHER_MODE_POLY1305: rc = _gcry_cipher_poly1305_get_tag (hd, outtag, taglen); break; case GCRY_CIPHER_MODE_OCB: rc = _gcry_cipher_ocb_get_tag (hd, outtag, taglen); break; default: log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode); rc = GPG_ERR_INV_CIPHER_MODE; break; } return rc; } gcry_err_code_t _gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen) { gcry_err_code_t rc; switch (hd->mode) { case GCRY_CIPHER_MODE_CCM: rc = _gcry_cipher_ccm_check_tag (hd, intag, taglen); break; case GCRY_CIPHER_MODE_CMAC: rc = _gcry_cipher_cmac_check_tag (hd, intag, taglen); break; case GCRY_CIPHER_MODE_GCM: rc = _gcry_cipher_gcm_check_tag (hd, intag, taglen); break; case GCRY_CIPHER_MODE_POLY1305: rc = _gcry_cipher_poly1305_check_tag (hd, intag, taglen); break; case GCRY_CIPHER_MODE_OCB: rc = _gcry_cipher_ocb_check_tag (hd, intag, taglen); break; default: log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode); rc = GPG_ERR_INV_CIPHER_MODE; break; } return rc; } gcry_err_code_t _gcry_cipher_ctl (gcry_cipher_hd_t h, int cmd, void *buffer, size_t buflen) { gcry_err_code_t rc = 0; switch (cmd) { case GCRYCTL_RESET: cipher_reset (h); break; case GCRYCTL_FINALIZE: if (!h || buffer || buflen) return GPG_ERR_INV_ARG; h->marks.finalize = 1; break; case GCRYCTL_CFB_SYNC: cipher_sync( h ); break; case GCRYCTL_SET_CBC_CTS: if (buflen) if (h->flags & GCRY_CIPHER_CBC_MAC) rc = GPG_ERR_INV_FLAG; else h->flags |= GCRY_CIPHER_CBC_CTS; else h->flags &= ~GCRY_CIPHER_CBC_CTS; break; case GCRYCTL_SET_CBC_MAC: if (buflen) if (h->flags & GCRY_CIPHER_CBC_CTS) rc = GPG_ERR_INV_FLAG; else h->flags |= GCRY_CIPHER_CBC_MAC; else h->flags &= ~GCRY_CIPHER_CBC_MAC; break; case GCRYCTL_SET_CCM_LENGTHS: { u64 params[3]; size_t encryptedlen; size_t aadlen; size_t authtaglen; if (h->mode != GCRY_CIPHER_MODE_CCM) return GPG_ERR_INV_CIPHER_MODE; if (!buffer || buflen != 3 * sizeof(u64)) return GPG_ERR_INV_ARG; /* This command is used to pass additional length parameters needed by CCM mode to initialize CBC-MAC. */ memcpy (params, buffer, sizeof(params)); encryptedlen = params[0]; aadlen = params[1]; authtaglen = params[2]; rc = _gcry_cipher_ccm_set_lengths (h, encryptedlen, aadlen, authtaglen); } break; case GCRYCTL_SET_TAGLEN: if (!h || !buffer || buflen != sizeof(int) ) return GPG_ERR_INV_ARG; switch (h->mode) { case GCRY_CIPHER_MODE_OCB: switch (*(int*)buffer) { case 8: case 12: case 16: h->u_mode.ocb.taglen = *(int*)buffer; break; default: rc = GPG_ERR_INV_LENGTH; /* Invalid tag length. */ break; } break; default: rc =GPG_ERR_INV_CIPHER_MODE; break; } break; case GCRYCTL_DISABLE_ALGO: /* This command expects NULL for H and BUFFER to point to an integer with the algo number. */ if( h || !buffer || buflen != sizeof(int) ) return GPG_ERR_CIPHER_ALGO; disable_cipher_algo( *(int*)buffer ); break; case PRIV_CIPHERCTL_DISABLE_WEAK_KEY: /* (private) */ if (h->spec->set_extra_info) rc = h->spec->set_extra_info (&h->context.c, CIPHER_INFO_NO_WEAK_KEY, NULL, 0); else rc = GPG_ERR_NOT_SUPPORTED; break; case PRIV_CIPHERCTL_GET_INPUT_VECTOR: /* (private) */ /* This is the input block as used in CFB and OFB mode which has initially been set as IV. The returned format is: 1 byte Actual length of the block in bytes. n byte The block. If the provided buffer is too short, an error is returned. */ if (buflen < (1 + h->spec->blocksize)) rc = GPG_ERR_TOO_SHORT; else { unsigned char *ivp; unsigned char *dst = buffer; int n = h->unused; if (!n) n = h->spec->blocksize; gcry_assert (n <= h->spec->blocksize); *dst++ = n; ivp = h->u_iv.iv + h->spec->blocksize - n; while (n--) *dst++ = *ivp++; } break; case GCRYCTL_SET_SBOX: if (h->spec->set_extra_info) rc = h->spec->set_extra_info (&h->context.c, GCRYCTL_SET_SBOX, buffer, buflen); else rc = GPG_ERR_NOT_SUPPORTED; break; default: rc = GPG_ERR_INV_OP; } return rc; } /* Return information about the cipher handle H. CMD is the kind of * information requested. * * CMD may be one of: * * GCRYCTL_GET_TAGLEN: * Return the length of the tag for an AE algorithm mode. An * error is returned for modes which do not support a tag. * BUFFER must be given as NULL. On success the result is stored * at NBYTES. The taglen is returned in bytes. * * The function returns 0 on success or an error code. */ gcry_err_code_t _gcry_cipher_info (gcry_cipher_hd_t h, int cmd, void *buffer, size_t *nbytes) { gcry_err_code_t rc = 0; switch (cmd) { case GCRYCTL_GET_TAGLEN: if (!h || buffer || !nbytes) rc = GPG_ERR_INV_ARG; else { switch (h->mode) { case GCRY_CIPHER_MODE_OCB: *nbytes = h->u_mode.ocb.taglen; break; case GCRY_CIPHER_MODE_CCM: *nbytes = h->u_mode.ccm.authlen; break; case GCRY_CIPHER_MODE_GCM: *nbytes = GCRY_GCM_BLOCK_LEN; break; case GCRY_CIPHER_MODE_POLY1305: *nbytes = POLY1305_TAGLEN; break; default: rc = GPG_ERR_INV_CIPHER_MODE; break; } } break; default: rc = GPG_ERR_INV_OP; } return rc; } /* Return information about the given cipher algorithm ALGO. WHAT select the kind of information returned: GCRYCTL_GET_KEYLEN: Return the length of the key. If the algorithm ALGO supports multiple key lengths, the maximum supported key length is returned. The key length is returned as number of octets. BUFFER and NBYTES must be zero. GCRYCTL_GET_BLKLEN: Return the blocklength of the algorithm ALGO counted in octets. BUFFER and NBYTES must be zero. GCRYCTL_TEST_ALGO: Returns 0 if the specified algorithm ALGO is available for use. BUFFER and NBYTES must be zero. Note: Because this function is in most cases used to return an integer value, we can make it easier for the caller to just look at the return value. The caller will in all cases consult the value and thereby detecting whether a error occurred or not (i.e. while checking the block size) */ gcry_err_code_t _gcry_cipher_algo_info (int algo, int what, void *buffer, size_t *nbytes) { gcry_err_code_t rc = 0; unsigned int ui; switch (what) { case GCRYCTL_GET_KEYLEN: if (buffer || (! nbytes)) rc = GPG_ERR_CIPHER_ALGO; else { ui = cipher_get_keylen (algo); if ((ui > 0) && (ui <= 512)) *nbytes = (size_t) ui / 8; else /* The only reason for an error is an invalid algo. */ rc = GPG_ERR_CIPHER_ALGO; } break; case GCRYCTL_GET_BLKLEN: if (buffer || (! nbytes)) rc = GPG_ERR_CIPHER_ALGO; else { ui = cipher_get_blocksize (algo); if ((ui > 0) && (ui < 10000)) *nbytes = ui; else { /* The only reason is an invalid algo or a strange blocksize. */ rc = GPG_ERR_CIPHER_ALGO; } } break; case GCRYCTL_TEST_ALGO: if (buffer || nbytes) rc = GPG_ERR_INV_ARG; else rc = check_cipher_algo (algo); break; default: rc = GPG_ERR_INV_OP; } return rc; } /* This function returns length of the key for algorithm ALGO. If the algorithm supports multiple key lengths, the maximum supported key length is returned. On error 0 is returned. The key length is returned as number of octets. This is a convenience functions which should be preferred over gcry_cipher_algo_info because it allows for proper type checking. */ size_t _gcry_cipher_get_algo_keylen (int algo) { size_t n; if (_gcry_cipher_algo_info (algo, GCRYCTL_GET_KEYLEN, NULL, &n)) n = 0; return n; } /* This functions returns the blocklength of the algorithm ALGO counted in octets. On error 0 is returned. This is a convenience functions which should be preferred over gcry_cipher_algo_info because it allows for proper type checking. */ size_t _gcry_cipher_get_algo_blklen (int algo) { size_t n; if (_gcry_cipher_algo_info( algo, GCRYCTL_GET_BLKLEN, NULL, &n)) n = 0; return n; } /* Explicitly initialize this module. */ gcry_err_code_t _gcry_cipher_init (void) { if (fips_mode()) { /* disable algorithms that are disallowed in fips */ int idx; gcry_cipher_spec_t *spec; for (idx = 0; (spec = cipher_list[idx]); idx++) if (!spec->flags.fips) spec->flags.disabled = 1; } return 0; } /* Run the selftests for cipher algorithm ALGO with optional reporting function REPORT. */ gpg_error_t _gcry_cipher_selftest (int algo, int extended, selftest_report_func_t report) { gcry_err_code_t ec = 0; gcry_cipher_spec_t *spec; spec = spec_from_algo (algo); if (spec && !spec->flags.disabled && spec->selftest) ec = spec->selftest (algo, extended, report); else { ec = GPG_ERR_CIPHER_ALGO; if (report) report ("cipher", algo, "module", (spec && !spec->flags.disabled)? "no selftest available" : spec? "algorithm disabled" : "algorithm not found"); } return gpg_error (ec); } diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index 3d323cf0..50a0745b 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -1,3010 +1,3301 @@ /* AES-NI accelerated AES for Libgcrypt * Copyright (C) 2000, 2001, 2002, 2003, 2007, * 2008, 2011, 2012 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_AESNI #if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif typedef struct u128_s { u32 a, b, c, d; } __attribute__((packed, aligned(1), may_alias)) u128_t; /* Two macros to be called prior and after the use of AESNI instructions. There should be no external function calls between the use of these macros. There purpose is to make sure that the SSE regsiters are cleared and won't reveal any information about the key or the data. */ #ifdef __WIN64__ /* XMM6-XMM15 are callee-saved registers on WIN64. */ # define aesni_prepare_2_6_variable char win64tmp[16] # define aesni_prepare_7_15_variable char win64tmp7_15[16 * 9] # define aesni_prepare() do { } while (0) # define aesni_prepare_2_6() \ do { asm volatile ("movdqu %%xmm6, %0\n\t" \ : "=m" (*win64tmp) \ : \ : "memory"); \ } while (0) # define aesni_prepare_7_15() \ do { asm volatile ("movdqu %%xmm7, 0*16(%0)\n\t" \ "movdqu %%xmm8, 1*16(%0)\n\t" \ "movdqu %%xmm9, 2*16(%0)\n\t" \ "movdqu %%xmm10, 3*16(%0)\n\t" \ "movdqu %%xmm11, 4*16(%0)\n\t" \ "movdqu %%xmm12, 5*16(%0)\n\t" \ "movdqu %%xmm13, 6*16(%0)\n\t" \ "movdqu %%xmm14, 7*16(%0)\n\t" \ "movdqu %%xmm15, 8*16(%0)\n\t" \ : \ : "r" (win64tmp7_15) \ : "memory"); \ } while (0) # define aesni_cleanup() \ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ "pxor %%xmm1, %%xmm1\n" :: ); \ } while (0) # define aesni_cleanup_2_6() \ do { asm volatile ("movdqu %0, %%xmm6\n\t" \ "pxor %%xmm2, %%xmm2\n" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ : \ : "m" (*win64tmp) \ : "memory"); \ } while (0) # define aesni_cleanup_7_15() \ do { asm volatile ("movdqu 0*16(%0), %%xmm7\n\t" \ "movdqu 1*16(%0), %%xmm8\n\t" \ "movdqu 2*16(%0), %%xmm9\n\t" \ "movdqu 3*16(%0), %%xmm10\n\t" \ "movdqu 4*16(%0), %%xmm11\n\t" \ "movdqu 5*16(%0), %%xmm12\n\t" \ "movdqu 6*16(%0), %%xmm13\n\t" \ "movdqu 7*16(%0), %%xmm14\n\t" \ "movdqu 8*16(%0), %%xmm15\n\t" \ : \ : "r" (win64tmp7_15) \ : "memory"); \ } while (0) #else # define aesni_prepare_2_6_variable # define aesni_prepare() do { } while (0) # define aesni_prepare_2_6() do { } while (0) # define aesni_cleanup() \ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ "pxor %%xmm1, %%xmm1\n" :: ); \ } while (0) # define aesni_cleanup_2_6() \ do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ "pxor %%xmm6, %%xmm6\n":: ); \ } while (0) # ifdef __x86_64__ # define aesni_prepare_7_15_variable # define aesni_prepare_7_15() do { } while (0) # define aesni_cleanup_7_15() \ do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \ "pxor %%xmm8, %%xmm8\n" \ "pxor %%xmm9, %%xmm9\n" \ "pxor %%xmm10, %%xmm10\n" \ "pxor %%xmm11, %%xmm11\n" \ "pxor %%xmm12, %%xmm12\n" \ "pxor %%xmm13, %%xmm13\n" \ "pxor %%xmm14, %%xmm14\n" \ "pxor %%xmm15, %%xmm15\n":: ); \ } while (0) # endif #endif void _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) { aesni_prepare_2_6_variable; aesni_prepare(); aesni_prepare_2_6(); if (ctx->rounds < 12) { /* 128-bit key */ #define AESKEYGENASSIST_xmm1_xmm2(imm8) \ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t" #define AESKEY_EXPAND128 \ "pshufd $0xff, %%xmm2, %%xmm2\n\t" \ "movdqa %%xmm1, %%xmm3\n\t" \ "pslldq $4, %%xmm3\n\t" \ "pxor %%xmm3, %%xmm1\n\t" \ "pslldq $4, %%xmm3\n\t" \ "pxor %%xmm3, %%xmm1\n\t" \ "pslldq $4, %%xmm3\n\t" \ "pxor %%xmm3, %%xmm2\n\t" \ "pxor %%xmm2, %%xmm1\n\t" asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key */ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x01) AESKEY_EXPAND128 "movdqa %%xmm1, 0x10(%[ksch])\n\t" /* ksch[1] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x02) AESKEY_EXPAND128 "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x04) AESKEY_EXPAND128 "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x08) AESKEY_EXPAND128 "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x10) AESKEY_EXPAND128 "movdqa %%xmm1, 0x50(%[ksch])\n\t" /* ksch[5] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x20) AESKEY_EXPAND128 "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x40) AESKEY_EXPAND128 "movdqa %%xmm1, 0x70(%[ksch])\n\t" /* ksch[7] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x80) AESKEY_EXPAND128 "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x1b) AESKEY_EXPAND128 "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x36) AESKEY_EXPAND128 "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1 */ : : [key] "r" (key), [ksch] "r" (ctx->keyschenc) : "cc", "memory" ); #undef AESKEYGENASSIST_xmm1_xmm2 #undef AESKEY_EXPAND128 } else if (ctx->rounds == 12) { /* 192-bit key */ #define AESKEYGENASSIST_xmm3_xmm2(imm8) \ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t" #define AESKEY_EXPAND192 \ "pshufd $0x55, %%xmm2, %%xmm2\n\t" \ "movdqu %%xmm1, %%xmm4\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pxor %%xmm2, %%xmm1\n\t" \ "pshufd $0xff, %%xmm1, %%xmm2\n\t" \ "movdqu %%xmm3, %%xmm4\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm3\n\t" \ "pxor %%xmm2, %%xmm3\n\t" asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key[0..15] */ "movq 16(%[key]), %%xmm3\n\t" /* xmm3 := key[16..23] */ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ "movdqa %%xmm3, %%xmm5\n\t" AESKEYGENASSIST_xmm3_xmm2(0x01) AESKEY_EXPAND192 "shufpd $0, %%xmm1, %%xmm5\n\t" "movdqa %%xmm5, 0x10(%[ksch])\n\t" /* ksch[1] := xmm5 */ "movdqa %%xmm1, %%xmm6\n\t" "shufpd $1, %%xmm3, %%xmm6\n\t" "movdqa %%xmm6, 0x20(%[ksch])\n\t" /* ksch[2] := xmm6 */ AESKEYGENASSIST_xmm3_xmm2(0x02) AESKEY_EXPAND192 "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1 */ "movdqa %%xmm3, %%xmm5\n\t" AESKEYGENASSIST_xmm3_xmm2(0x04) AESKEY_EXPAND192 "shufpd $0, %%xmm1, %%xmm5\n\t" "movdqa %%xmm5, 0x40(%[ksch])\n\t" /* ksch[4] := xmm5 */ "movdqa %%xmm1, %%xmm6\n\t" "shufpd $1, %%xmm3, %%xmm6\n\t" "movdqa %%xmm6, 0x50(%[ksch])\n\t" /* ksch[5] := xmm6 */ AESKEYGENASSIST_xmm3_xmm2(0x08) AESKEY_EXPAND192 "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ "movdqa %%xmm3, %%xmm5\n\t" AESKEYGENASSIST_xmm3_xmm2(0x10) AESKEY_EXPAND192 "shufpd $0, %%xmm1, %%xmm5\n\t" "movdqa %%xmm5, 0x70(%[ksch])\n\t" /* ksch[7] := xmm5 */ "movdqa %%xmm1, %%xmm6\n\t" "shufpd $1, %%xmm3, %%xmm6\n\t" "movdqa %%xmm6, 0x80(%[ksch])\n\t" /* ksch[8] := xmm6 */ AESKEYGENASSIST_xmm3_xmm2(0x20) AESKEY_EXPAND192 "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1 */ "movdqa %%xmm3, %%xmm5\n\t" AESKEYGENASSIST_xmm3_xmm2(0x40) AESKEY_EXPAND192 "shufpd $0, %%xmm1, %%xmm5\n\t" "movdqa %%xmm5, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm5 */ "movdqa %%xmm1, %%xmm6\n\t" "shufpd $1, %%xmm3, %%xmm6\n\t" "movdqa %%xmm6, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm6 */ AESKEYGENASSIST_xmm3_xmm2(0x80) AESKEY_EXPAND192 "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1 */ : : [key] "r" (key), [ksch] "r" (ctx->keyschenc) : "cc", "memory" ); #undef AESKEYGENASSIST_xmm3_xmm2 #undef AESKEY_EXPAND192 } else if (ctx->rounds > 12) { /* 256-bit key */ #define AESKEYGENASSIST_xmm1_xmm2(imm8) \ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t" #define AESKEYGENASSIST_xmm3_xmm2(imm8) \ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t" #define AESKEY_EXPAND256_A \ "pshufd $0xff, %%xmm2, %%xmm2\n\t" \ "movdqa %%xmm1, %%xmm4\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pxor %%xmm2, %%xmm1\n\t" #define AESKEY_EXPAND256_B \ "pshufd $0xaa, %%xmm2, %%xmm2\n\t" \ "movdqa %%xmm3, %%xmm4\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm3\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm3\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm3\n\t" \ "pxor %%xmm2, %%xmm3\n\t" asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key[0..15] */ "movdqu 16(%[key]), %%xmm3\n\t" /* xmm3 := key[16..31] */ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ "movdqa %%xmm3, 0x10(%[ksch])\n\t" /* ksch[1] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x01) AESKEY_EXPAND256_A "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0x30(%[ksch])\n\t" /* ksch[3] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x02) AESKEY_EXPAND256_A "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0x50(%[ksch])\n\t" /* ksch[5] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x04) AESKEY_EXPAND256_A "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0x70(%[ksch])\n\t" /* ksch[7] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x08) AESKEY_EXPAND256_A "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0x90(%[ksch])\n\t" /* ksch[9] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x10) AESKEY_EXPAND256_A "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x20) AESKEY_EXPAND256_A "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0xd0(%[ksch])\n\t" /* ksch[13] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x40) AESKEY_EXPAND256_A "movdqa %%xmm1, 0xe0(%[ksch])\n\t" /* ksch[14] := xmm1 */ : : [key] "r" (key), [ksch] "r" (ctx->keyschenc) : "cc", "memory" ); #undef AESKEYGENASSIST_xmm1_xmm2 #undef AESKEYGENASSIST_xmm3_xmm2 #undef AESKEY_EXPAND256_A #undef AESKEY_EXPAND256_B } aesni_cleanup(); aesni_cleanup_2_6(); } /* Make a decryption key from an encryption key. */ void _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) { /* The AES-NI decrypt instructions use the Equivalent Inverse Cipher, thus we can't use the the standard decrypt key preparation. */ u128_t *ekey = (u128_t *)ctx->keyschenc; u128_t *dkey = (u128_t *)ctx->keyschdec; int rr; int r; aesni_prepare(); #define DO_AESNI_AESIMC() \ asm volatile ("movdqa %[ekey], %%xmm1\n\t" \ /*"aesimc %%xmm1, %%xmm1\n\t"*/ \ ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t" \ "movdqa %%xmm1, %[dkey]" \ : [dkey] "=m" (dkey[r]) \ : [ekey] "m" (ekey[rr]) \ : "memory") dkey[0] = ekey[ctx->rounds]; r=1; rr=ctx->rounds-1; DO_AESNI_AESIMC(); r++; rr--; /* round 1 */ DO_AESNI_AESIMC(); r++; rr--; /* round 2 */ DO_AESNI_AESIMC(); r++; rr--; /* round 3 */ DO_AESNI_AESIMC(); r++; rr--; /* round 4 */ DO_AESNI_AESIMC(); r++; rr--; /* round 5 */ DO_AESNI_AESIMC(); r++; rr--; /* round 6 */ DO_AESNI_AESIMC(); r++; rr--; /* round 7 */ DO_AESNI_AESIMC(); r++; rr--; /* round 8 */ DO_AESNI_AESIMC(); r++; rr--; /* round 9 */ if (ctx->rounds > 10) { DO_AESNI_AESIMC(); r++; rr--; /* round 10 */ DO_AESNI_AESIMC(); r++; rr--; /* round 11 */ if (ctx->rounds > 12) { DO_AESNI_AESIMC(); r++; rr--; /* round 12 */ DO_AESNI_AESIMC(); r++; rr--; /* round 13 */ } } dkey[r] = ekey[0]; #undef DO_AESNI_AESIMC aesni_cleanup(); } /* Encrypt one block using the Intel AES-NI instructions. Block is input * and output through SSE register xmm0. */ static inline void do_aesni_enc (const RIJNDAEL_context *ctx) { #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" asm volatile ("movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x20(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x30(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x40(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x50(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x60(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x70(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x80(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x90(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xa0(%[key]), %%xmm1\n\t" "cmpl $10, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 "movdqa 0xb0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xc0(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 "movdqa 0xd0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xe0(%[key]), %%xmm1\n" ".Lenclast%=:\n\t" aesenclast_xmm1_xmm0 "\n" : : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); #undef aesenc_xmm1_xmm0 #undef aesenclast_xmm1_xmm0 } /* Decrypt one block using the Intel AES-NI instructions. Block is input * and output through SSE register xmm0. */ static inline void do_aesni_dec (const RIJNDAEL_context *ctx) { #define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t" #define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t" asm volatile ("movdqa (%[key]), %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x20(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x30(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x40(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x50(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x60(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x70(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x80(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x90(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0xa0(%[key]), %%xmm1\n\t" "cmpl $10, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesdec_xmm1_xmm0 "movdqa 0xb0(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0xc0(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesdec_xmm1_xmm0 "movdqa 0xd0(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0xe0(%[key]), %%xmm1\n" ".Ldeclast%=:\n\t" aesdeclast_xmm1_xmm0 "\n" : : [key] "r" (ctx->keyschdec), [rounds] "r" (ctx->rounds) : "cc", "memory"); #undef aesdec_xmm1_xmm0 #undef aesdeclast_xmm1_xmm0 } /* Encrypt four blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4. */ static inline void do_aesni_enc_vec4 (const RIJNDAEL_context *ctx) { #define aesenc_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc8\n\t" #define aesenc_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd0\n\t" #define aesenc_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd8\n\t" #define aesenc_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe0\n\t" #define aesenclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc8\n\t" #define aesenclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd0\n\t" #define aesenclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd8\n\t" #define aesenclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe0\n\t" asm volatile ("movdqa (%[key]), %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x20(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x30(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x40(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x50(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x60(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x70(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x80(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x90(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xa0(%[key]), %%xmm0\n\t" "cmpl $10, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xb0(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xc0(%[key]), %%xmm0\n\t" "cmpl $12, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xd0(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xe0(%[key]), %%xmm0\n" ".Ldeclast%=:\n\t" aesenclast_xmm0_xmm1 aesenclast_xmm0_xmm2 aesenclast_xmm0_xmm3 aesenclast_xmm0_xmm4 : /* no output */ : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); #undef aesenc_xmm0_xmm1 #undef aesenc_xmm0_xmm2 #undef aesenc_xmm0_xmm3 #undef aesenc_xmm0_xmm4 #undef aesenclast_xmm0_xmm1 #undef aesenclast_xmm0_xmm2 #undef aesenclast_xmm0_xmm3 #undef aesenclast_xmm0_xmm4 } /* Decrypt four blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4. */ static inline void do_aesni_dec_vec4 (const RIJNDAEL_context *ctx) { #define aesdec_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc8\n\t" #define aesdec_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd0\n\t" #define aesdec_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd8\n\t" #define aesdec_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xde, 0xe0\n\t" #define aesdeclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc8\n\t" #define aesdeclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd0\n\t" #define aesdeclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd8\n\t" #define aesdeclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xe0\n\t" asm volatile ("movdqa (%[key]), %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x20(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x30(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x40(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x50(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x60(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x70(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x80(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x90(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xa0(%[key]), %%xmm0\n\t" "cmpl $10, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xb0(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xc0(%[key]), %%xmm0\n\t" "cmpl $12, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xd0(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xe0(%[key]), %%xmm0\n" ".Ldeclast%=:\n\t" aesdeclast_xmm0_xmm1 aesdeclast_xmm0_xmm2 aesdeclast_xmm0_xmm3 aesdeclast_xmm0_xmm4 : /* no output */ : [key] "r" (ctx->keyschdec), [rounds] "r" (ctx->rounds) : "cc", "memory"); #undef aesdec_xmm0_xmm1 #undef aesdec_xmm0_xmm2 #undef aesdec_xmm0_xmm3 #undef aesdec_xmm0_xmm4 #undef aesdeclast_xmm0_xmm1 #undef aesdeclast_xmm0_xmm2 #undef aesdeclast_xmm0_xmm3 #undef aesdeclast_xmm0_xmm4 } #ifdef __x86_64__ /* Encrypt eight blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */ static inline void do_aesni_enc_vec8 (const RIJNDAEL_context *ctx) { asm volatile ("movdqa (%[key]), %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ "pxor %%xmm0, %%xmm8\n\t" /* xmm8 ^= key[0] */ "pxor %%xmm0, %%xmm9\n\t" /* xmm9 ^= key[0] */ "pxor %%xmm0, %%xmm10\n\t" /* xmm10 ^= key[0] */ "pxor %%xmm0, %%xmm11\n\t" /* xmm11 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm0\n\t" "cmpl $12, %[rounds]\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x20(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x30(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x40(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x50(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x60(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x70(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x80(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x90(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xa0(%[key]), %%xmm0\n\t" "jb .Ldeclast%=\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xb0(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xc0(%[key]), %%xmm0\n\t" "je .Ldeclast%=\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xd0(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xe0(%[key]), %%xmm0\n" ".Ldeclast%=:\n\t" "aesenclast %%xmm0, %%xmm1\n\t" "aesenclast %%xmm0, %%xmm2\n\t" "aesenclast %%xmm0, %%xmm3\n\t" "aesenclast %%xmm0, %%xmm4\n\t" "aesenclast %%xmm0, %%xmm8\n\t" "aesenclast %%xmm0, %%xmm9\n\t" "aesenclast %%xmm0, %%xmm10\n\t" "aesenclast %%xmm0, %%xmm11\n\t" : /* no output */ : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); } /* Decrypt eight blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */ static inline void do_aesni_dec_vec8 (const RIJNDAEL_context *ctx) { asm volatile ("movdqa (%[key]), %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ "pxor %%xmm0, %%xmm8\n\t" /* xmm8 ^= key[0] */ "pxor %%xmm0, %%xmm9\n\t" /* xmm9 ^= key[0] */ "pxor %%xmm0, %%xmm10\n\t" /* xmm10 ^= key[0] */ "pxor %%xmm0, %%xmm11\n\t" /* xmm11 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm0\n\t" "cmpl $12, %[rounds]\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x20(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x30(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x40(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x50(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x60(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x70(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x80(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x90(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xa0(%[key]), %%xmm0\n\t" "jb .Ldeclast%=\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xb0(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xc0(%[key]), %%xmm0\n\t" "je .Ldeclast%=\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xd0(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xe0(%[key]), %%xmm0\n" ".Ldeclast%=:\n\t" "aesdeclast %%xmm0, %%xmm1\n\t" "aesdeclast %%xmm0, %%xmm2\n\t" "aesdeclast %%xmm0, %%xmm3\n\t" "aesdeclast %%xmm0, %%xmm4\n\t" "aesdeclast %%xmm0, %%xmm8\n\t" "aesdeclast %%xmm0, %%xmm9\n\t" "aesdeclast %%xmm0, %%xmm10\n\t" "aesdeclast %%xmm0, %%xmm11\n\t" : /* no output */ : [key] "r" (ctx->keyschdec), [rounds] "r" (ctx->rounds) : "cc", "memory"); } #endif /* __x86_64__ */ /* Perform a CTR encryption round using the counter CTR and the input block A. Write the result to the output block B and update CTR. CTR needs to be a 16 byte aligned little-endian value. */ static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr, unsigned char *b, const unsigned char *a) { #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" asm volatile ("movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */ "pcmpeqd %%xmm1, %%xmm1\n\t" "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ "pshufb %%xmm6, %%xmm5\n\t" "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ (big endian) */ /* detect if 64-bit carry handling is needed */ "cmpl $0xffffffff, 8(%[ctr])\n\t" "jne .Lno_carry%=\n\t" "cmpl $0xffffffff, 12(%[ctr])\n\t" "jne .Lno_carry%=\n\t" "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ "psubq %%xmm1, %%xmm5\n\t" /* add carry to upper 64bits */ ".Lno_carry%=:\n\t" "pshufb %%xmm6, %%xmm5\n\t" "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */ "pxor (%[key]), %%xmm0\n\t" /* xmm1 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x20(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x30(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x40(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x50(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x60(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x70(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x80(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x90(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xa0(%[key]), %%xmm1\n\t" "cmpl $10, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 "movdqa 0xb0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xc0(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 "movdqa 0xd0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xe0(%[key]), %%xmm1\n" ".Lenclast%=:\n\t" aesenclast_xmm1_xmm0 "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ : [dst] "=m" (*b) : [src] "m" (*a), [ctr] "r" (ctr), [key] "r" (ctx->keyschenc), [rounds] "g" (ctx->rounds) : "cc", "memory"); #undef aesenc_xmm1_xmm0 #undef aesenclast_xmm1_xmm0 } /* Four blocks at a time variant of do_aesni_ctr. */ static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr, unsigned char *b, const unsigned char *a) { static const byte bige_addb_const[4][16] __attribute__ ((aligned (16))) = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 } }; const void *bige_addb = bige_addb_const; #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" #define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t" #define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t" #define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t" #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" #define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t" #define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t" #define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t" /* Register usage: [key] keyschedule xmm0 CTR-0 xmm1 temp / round key xmm2 CTR-1 xmm3 CTR-2 xmm4 CTR-3 xmm5 copy of *ctr xmm6 endian swapping mask */ asm volatile (/* detect if 8-bit carry handling is needed */ "cmpb $0xfb, 15(%[ctr])\n\t" "ja .Ladd32bit%=\n\t" "movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */ "movdqa 0*16(%[addb]), %%xmm2\n\t" /* xmm2 := be(1) */ "movdqa 1*16(%[addb]), %%xmm3\n\t" /* xmm3 := be(2) */ "movdqa 2*16(%[addb]), %%xmm4\n\t" /* xmm4 := be(3) */ "movdqa 3*16(%[addb]), %%xmm5\n\t" /* xmm5 := be(4) */ "paddb %%xmm0, %%xmm2\n\t" /* xmm2 := be(1) + CTR (xmm0) */ "paddb %%xmm0, %%xmm3\n\t" /* xmm3 := be(2) + CTR (xmm0) */ "paddb %%xmm0, %%xmm4\n\t" /* xmm4 := be(3) + CTR (xmm0) */ "paddb %%xmm0, %%xmm5\n\t" /* xmm5 := be(4) + CTR (xmm0) */ "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "jmp .Lstore_ctr%=\n\t" ".Ladd32bit%=:\n\t" "movdqa %%xmm5, %%xmm0\n\t" /* xmm0, xmm2 := CTR (xmm5) */ "movdqa %%xmm0, %%xmm2\n\t" "pcmpeqd %%xmm1, %%xmm1\n\t" "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := le(xmm2) */ "psubq %%xmm1, %%xmm2\n\t" /* xmm2++ */ "movdqa %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ "psubq %%xmm1, %%xmm3\n\t" /* xmm3++ */ "movdqa %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ "psubq %%xmm1, %%xmm4\n\t" /* xmm4++ */ "movdqa %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */ "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ */ /* detect if 64-bit carry handling is needed */ "cmpl $0xffffffff, 8(%[ctr])\n\t" "jne .Lno_carry%=\n\t" "movl 12(%[ctr]), %%esi\n\t" "bswapl %%esi\n\t" "cmpl $0xfffffffc, %%esi\n\t" "jb .Lno_carry%=\n\t" /* no carry */ "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ "je .Lcarry_xmm5%=\n\t" /* esi == 0xfffffffc */ "cmpl $0xfffffffe, %%esi\n\t" "jb .Lcarry_xmm4%=\n\t" /* esi == 0xfffffffd */ "je .Lcarry_xmm3%=\n\t" /* esi == 0xfffffffe */ /* esi == 0xffffffff */ "psubq %%xmm1, %%xmm2\n\t" ".Lcarry_xmm3%=:\n\t" "psubq %%xmm1, %%xmm3\n\t" ".Lcarry_xmm4%=:\n\t" "psubq %%xmm1, %%xmm4\n\t" ".Lcarry_xmm5%=:\n\t" "psubq %%xmm1, %%xmm5\n\t" ".Lno_carry%=:\n\t" "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := be(xmm2) */ "pshufb %%xmm6, %%xmm3\n\t" /* xmm3 := be(xmm3) */ "pshufb %%xmm6, %%xmm4\n\t" /* xmm4 := be(xmm4) */ "pshufb %%xmm6, %%xmm5\n\t" /* xmm5 := be(xmm5) */ ".Lstore_ctr%=:\n\t" "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */ : : [ctr] "r" (ctr), [key] "r" (ctx->keyschenc), [addb] "r" (bige_addb) : "%esi", "cc", "memory"); asm volatile ("pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x20(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x30(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x40(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x50(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x60(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x70(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x80(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x90(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xa0(%[key]), %%xmm1\n\t" "cmpl $10, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xb0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xc0(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xd0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xe0(%[key]), %%xmm1\n" ".Lenclast%=:\n\t" aesenclast_xmm1_xmm0 aesenclast_xmm1_xmm2 aesenclast_xmm1_xmm3 aesenclast_xmm1_xmm4 : : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); asm volatile ("movdqu (%[src]), %%xmm1\n\t" /* Get block 1. */ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */ "movdqu %%xmm0, (%[dst])\n\t" /* Store block 1 */ "movdqu 16(%[src]), %%xmm1\n\t" /* Get block 2. */ "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */ "movdqu %%xmm2, 16(%[dst])\n\t" /* Store block 2. */ "movdqu 32(%[src]), %%xmm1\n\t" /* Get block 3. */ "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */ "movdqu %%xmm3, 32(%[dst])\n\t" /* Store block 3. */ "movdqu 48(%[src]), %%xmm1\n\t" /* Get block 4. */ "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */ "movdqu %%xmm4, 48(%[dst])" /* Store block 4. */ : : [src] "r" (a), [dst] "r" (b) : "memory"); #undef aesenc_xmm1_xmm0 #undef aesenc_xmm1_xmm2 #undef aesenc_xmm1_xmm3 #undef aesenc_xmm1_xmm4 #undef aesenclast_xmm1_xmm0 #undef aesenclast_xmm1_xmm2 #undef aesenclast_xmm1_xmm3 #undef aesenclast_xmm1_xmm4 } #ifdef __x86_64__ /* Eight blocks at a time variant of do_aesni_ctr. */ static void do_aesni_ctr_8 (const RIJNDAEL_context *ctx, unsigned char *ctr, unsigned char *b, const unsigned char *a) { static const byte bige_addb_const[8][16] __attribute__ ((aligned (16))) = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 } }; const void *bige_addb = bige_addb_const; /* Register usage: [key] keyschedule xmm0 CTR-0 xmm1 temp / round key xmm2 CTR-1 xmm3 CTR-2 xmm4 CTR-3 xmm5 copy of *ctr xmm6 endian swapping mask xmm8 CTR-4 xmm9 CTR-5 xmm10 CTR-6 xmm11 CTR-7 xmm12 temp xmm13 temp xmm14 temp xmm15 temp */ asm volatile (/* detect if 8-bit carry handling is needed */ "cmpb $0xf7, 15(%[ctr])\n\t" "ja .Ladd32bit%=\n\t" "movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */ "movdqa 0*16(%[addb]), %%xmm2\n\t" /* xmm2 := be(1) */ "movdqa 1*16(%[addb]), %%xmm3\n\t" /* xmm3 := be(2) */ "movdqa 2*16(%[addb]), %%xmm4\n\t" /* xmm4 := be(3) */ "movdqa 3*16(%[addb]), %%xmm8\n\t" /* xmm8 := be(4) */ "movdqa 4*16(%[addb]), %%xmm9\n\t" /* xmm9 := be(5) */ "movdqa 5*16(%[addb]), %%xmm10\n\t" /* xmm10 := be(6) */ "movdqa 6*16(%[addb]), %%xmm11\n\t" /* xmm11 := be(7) */ "movdqa 7*16(%[addb]), %%xmm5\n\t" /* xmm5 := be(8) */ "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "paddb %%xmm0, %%xmm2\n\t" /* xmm2 := be(1) + CTR (xmm0) */ "paddb %%xmm0, %%xmm3\n\t" /* xmm3 := be(2) + CTR (xmm0) */ "paddb %%xmm0, %%xmm4\n\t" /* xmm4 := be(3) + CTR (xmm0) */ "paddb %%xmm0, %%xmm8\n\t" /* xmm8 := be(4) + CTR (xmm0) */ "paddb %%xmm0, %%xmm9\n\t" /* xmm9 := be(5) + CTR (xmm0) */ "paddb %%xmm0, %%xmm10\n\t" /* xmm10 := be(6) + CTR (xmm0) */ "paddb %%xmm0, %%xmm11\n\t" /* xmm11 := be(7) + CTR (xmm0) */ "paddb %%xmm0, %%xmm5\n\t" /* xmm5 := be(8) + CTR (xmm0) */ "jmp .Lstore_ctr%=\n\t" ".Ladd32bit%=:\n\t" "movdqa %%xmm5, %%xmm0\n\t" /* xmm0, xmm2 := CTR (xmm5) */ "movdqa %%xmm0, %%xmm2\n\t" "pcmpeqd %%xmm1, %%xmm1\n\t" "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := le(xmm2) */ "psubq %%xmm1, %%xmm2\n\t" /* xmm2++ */ "movdqa %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ "psubq %%xmm1, %%xmm3\n\t" /* xmm3++ */ "movdqa %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ "psubq %%xmm1, %%xmm4\n\t" /* xmm4++ */ "movdqa %%xmm4, %%xmm8\n\t" /* xmm8 := xmm4 */ "psubq %%xmm1, %%xmm8\n\t" /* xmm8++ */ "movdqa %%xmm8, %%xmm9\n\t" /* xmm9 := xmm8 */ "psubq %%xmm1, %%xmm9\n\t" /* xmm9++ */ "movdqa %%xmm9, %%xmm10\n\t" /* xmm10 := xmm9 */ "psubq %%xmm1, %%xmm10\n\t" /* xmm10++ */ "movdqa %%xmm10, %%xmm11\n\t" /* xmm11 := xmm10 */ "psubq %%xmm1, %%xmm11\n\t" /* xmm11++ */ "movdqa %%xmm11, %%xmm5\n\t" /* xmm5 := xmm11 */ "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ */ /* detect if 64-bit carry handling is needed */ "cmpl $0xffffffff, 8(%[ctr])\n\t" "jne .Lno_carry%=\n\t" "movl 12(%[ctr]), %%esi\n\t" "bswapl %%esi\n\t" "cmpl $0xfffffff8, %%esi\n\t" "jb .Lno_carry%=\n\t" /* no carry */ "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ "je .Lcarry_xmm5%=\n\t" /* esi == 0xfffffff8 */ "cmpl $0xfffffffa, %%esi\n\t" "jb .Lcarry_xmm11%=\n\t" /* esi == 0xfffffff9 */ "je .Lcarry_xmm10%=\n\t" /* esi == 0xfffffffa */ "cmpl $0xfffffffc, %%esi\n\t" "jb .Lcarry_xmm9%=\n\t" /* esi == 0xfffffffb */ "je .Lcarry_xmm8%=\n\t" /* esi == 0xfffffffc */ "cmpl $0xfffffffe, %%esi\n\t" "jb .Lcarry_xmm4%=\n\t" /* esi == 0xfffffffd */ "je .Lcarry_xmm3%=\n\t" /* esi == 0xfffffffe */ /* esi == 0xffffffff */ "psubq %%xmm1, %%xmm2\n\t" ".Lcarry_xmm3%=:\n\t" "psubq %%xmm1, %%xmm3\n\t" ".Lcarry_xmm4%=:\n\t" "psubq %%xmm1, %%xmm4\n\t" ".Lcarry_xmm8%=:\n\t" "psubq %%xmm1, %%xmm8\n\t" ".Lcarry_xmm9%=:\n\t" "psubq %%xmm1, %%xmm9\n\t" ".Lcarry_xmm10%=:\n\t" "psubq %%xmm1, %%xmm10\n\t" ".Lcarry_xmm11%=:\n\t" "psubq %%xmm1, %%xmm11\n\t" ".Lcarry_xmm5%=:\n\t" "psubq %%xmm1, %%xmm5\n\t" ".Lno_carry%=:\n\t" "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := be(xmm2) */ "pshufb %%xmm6, %%xmm3\n\t" /* xmm3 := be(xmm3) */ "pshufb %%xmm6, %%xmm4\n\t" /* xmm4 := be(xmm4) */ "pshufb %%xmm6, %%xmm5\n\t" /* xmm5 := be(xmm5) */ "pshufb %%xmm6, %%xmm8\n\t" /* xmm8 := be(xmm8) */ "pshufb %%xmm6, %%xmm9\n\t" /* xmm9 := be(xmm9) */ "pshufb %%xmm6, %%xmm10\n\t" /* xmm10 := be(xmm10) */ "pshufb %%xmm6, %%xmm11\n\t" /* xmm11 := be(xmm11) */ ".Lstore_ctr%=:\n\t" "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */ : : [ctr] "r" (ctr), [key] "r" (ctx->keyschenc), [addb] "r" (bige_addb) : "%esi", "cc", "memory"); asm volatile ("pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ "pxor %%xmm1, %%xmm8\n\t" /* xmm8 ^= key[0] */ "pxor %%xmm1, %%xmm9\n\t" /* xmm9 ^= key[0] */ "pxor %%xmm1, %%xmm10\n\t" /* xmm10 ^= key[0] */ "pxor %%xmm1, %%xmm11\n\t" /* xmm11 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x20(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x30(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x40(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x50(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x60(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x70(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x80(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x90(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xa0(%[key]), %%xmm1\n\t" "jb .Lenclast%=\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xb0(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xc0(%[key]), %%xmm1\n\t" "je .Lenclast%=\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xd0(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xe0(%[key]), %%xmm1\n" ".Lenclast%=:\n\t" "aesenclast %%xmm1, %%xmm0\n\t" "aesenclast %%xmm1, %%xmm2\n\t" "aesenclast %%xmm1, %%xmm3\n\t" "aesenclast %%xmm1, %%xmm4\n\t" "aesenclast %%xmm1, %%xmm8\n\t" "aesenclast %%xmm1, %%xmm9\n\t" "aesenclast %%xmm1, %%xmm10\n\t" "aesenclast %%xmm1, %%xmm11\n\t" : : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); asm volatile ("movdqu 0*16(%[src]), %%xmm12\n\t" /* Get block 1. */ "movdqu 1*16(%[src]), %%xmm13\n\t" /* Get block 2. */ "movdqu 2*16(%[src]), %%xmm14\n\t" /* Get block 3. */ "movdqu 3*16(%[src]), %%xmm15\n\t" /* Get block 4. */ "movdqu 4*16(%[src]), %%xmm1\n\t" /* Get block 5. */ "pxor %%xmm12, %%xmm0\n\t" /* EncCTR-1 ^= input */ "movdqu 5*16(%[src]), %%xmm12\n\t" /* Get block 6. */ "pxor %%xmm13, %%xmm2\n\t" /* EncCTR-2 ^= input */ "movdqu 6*16(%[src]), %%xmm13\n\t" /* Get block 7. */ "pxor %%xmm14, %%xmm3\n\t" /* EncCTR-3 ^= input */ "movdqu 7*16(%[src]), %%xmm14\n\t" /* Get block 8. */ "pxor %%xmm15, %%xmm4\n\t" /* EncCTR-4 ^= input */ "movdqu %%xmm0, 0*16(%[dst])\n\t" /* Store block 1 */ "pxor %%xmm1, %%xmm8\n\t" /* EncCTR-5 ^= input */ "movdqu %%xmm0, 0*16(%[dst])\n\t" /* Store block 1 */ "pxor %%xmm12, %%xmm9\n\t" /* EncCTR-6 ^= input */ "movdqu %%xmm2, 1*16(%[dst])\n\t" /* Store block 2. */ "pxor %%xmm13, %%xmm10\n\t" /* EncCTR-7 ^= input */ "movdqu %%xmm3, 2*16(%[dst])\n\t" /* Store block 3. */ "pxor %%xmm14, %%xmm11\n\t" /* EncCTR-8 ^= input */ "movdqu %%xmm4, 3*16(%[dst])\n\t" /* Store block 4. */ "movdqu %%xmm8, 4*16(%[dst])\n\t" /* Store block 8. */ "movdqu %%xmm9, 5*16(%[dst])\n\t" /* Store block 9. */ "movdqu %%xmm10, 6*16(%[dst])\n\t" /* Store block 10. */ "movdqu %%xmm11, 7*16(%[dst])\n\t" /* Store block 11. */ : : [src] "r" (a), [dst] "r" (b) : "memory"); } #endif /* __x86_64__ */ unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src) { aesni_prepare (); asm volatile ("movdqu %[src], %%xmm0\n\t" : : [src] "m" (*src) : "memory" ); do_aesni_enc (ctx); asm volatile ("movdqu %%xmm0, %[dst]\n\t" : [dst] "=m" (*dst) : : "memory" ); aesni_cleanup (); return 0; } void _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks) { aesni_prepare (); asm volatile ("movdqu %[iv], %%xmm0\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); for ( ;nblocks; nblocks-- ) { do_aesni_enc (ctx); asm volatile ("movdqu %[inbuf], %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : [inbuf] "m" (*inbuf) : "memory" ); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm0, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); aesni_cleanup (); } void _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks, int cbc_mac) { aesni_prepare_2_6_variable; aesni_prepare (); aesni_prepare_2_6(); asm volatile ("movdqu %[iv], %%xmm5\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); for ( ;nblocks; nblocks-- ) { asm volatile ("movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm5, %%xmm0\n\t" : /* No output */ : [inbuf] "m" (*inbuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("movdqa %%xmm0, %%xmm5\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; if (!cbc_mac) outbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm5, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_6 (); } void _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *ctr, size_t nblocks) { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; aesni_prepare_2_6_variable; aesni_prepare (); aesni_prepare_2_6(); asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */ "movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */ : /* No output */ : [mask] "m" (*be_mask), [ctr] "m" (*ctr) : "memory"); #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_7_15_variable; aesni_prepare_7_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { do_aesni_ctr_8 (ctx, ctr, outbuf, inbuf); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } aesni_cleanup_7_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { do_aesni_ctr (ctx, ctr, outbuf, inbuf); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } aesni_cleanup (); aesni_cleanup_2_6 (); } unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src) { aesni_prepare (); asm volatile ("movdqu %[src], %%xmm0\n\t" : : [src] "m" (*src) : "memory" ); do_aesni_dec (ctx); asm volatile ("movdqu %%xmm0, %[dst]\n\t" : [dst] "=m" (*dst) : : "memory" ); aesni_cleanup (); return 0; } void _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks) { aesni_prepare_2_6_variable; aesni_prepare (); aesni_prepare_2_6(); asm volatile ("movdqu %[iv], %%xmm6\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); /* CFB decryption can be parallelized */ #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_7_15_variable; aesni_prepare_7_15(); for ( ;nblocks >= 8; nblocks -= 8) { asm volatile ("movdqu %%xmm6, %%xmm1\n\t" /* load input blocks */ "movdqu 0*16(%[inbuf]), %%xmm2\n\t" "movdqu 1*16(%[inbuf]), %%xmm3\n\t" "movdqu 2*16(%[inbuf]), %%xmm4\n\t" "movdqu 3*16(%[inbuf]), %%xmm8\n\t" "movdqu 4*16(%[inbuf]), %%xmm9\n\t" "movdqu 5*16(%[inbuf]), %%xmm10\n\t" "movdqu 6*16(%[inbuf]), %%xmm11\n\t" "movdqu 7*16(%[inbuf]), %%xmm6\n\t" /* update IV */ "movdqa %%xmm2, %%xmm12\n\t" "movdqa %%xmm3, %%xmm13\n\t" "movdqa %%xmm4, %%xmm14\n\t" "movdqa %%xmm8, %%xmm15\n\t" : /* No output */ : [inbuf] "r" (inbuf) : "memory"); do_aesni_enc_vec8 (ctx); asm volatile ( "pxor %%xmm12, %%xmm1\n\t" "movdqu 4*16(%[inbuf]), %%xmm12\n\t" "pxor %%xmm13, %%xmm2\n\t" "movdqu 5*16(%[inbuf]), %%xmm13\n\t" "pxor %%xmm14, %%xmm3\n\t" "movdqu 6*16(%[inbuf]), %%xmm14\n\t" "pxor %%xmm15, %%xmm4\n\t" "movdqu 7*16(%[inbuf]), %%xmm15\n\t" "pxor %%xmm12, %%xmm8\n\t" "movdqu %%xmm1, 0*16(%[outbuf])\n\t" "pxor %%xmm13, %%xmm9\n\t" "movdqu %%xmm2, 1*16(%[outbuf])\n\t" "pxor %%xmm14, %%xmm10\n\t" "movdqu %%xmm3, 2*16(%[outbuf])\n\t" "pxor %%xmm15, %%xmm11\n\t" "movdqu %%xmm4, 3*16(%[outbuf])\n\t" "movdqu %%xmm8, 4*16(%[outbuf])\n\t" "movdqu %%xmm9, 5*16(%[outbuf])\n\t" "movdqu %%xmm10, 6*16(%[outbuf])\n\t" "movdqu %%xmm11, 7*16(%[outbuf])\n\t" : /* No output */ : [inbuf] "r" (inbuf), [outbuf] "r" (outbuf) : "memory"); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } aesni_cleanup_7_15(); } #endif for ( ;nblocks >= 4; nblocks -= 4) { asm volatile ("movdqu %%xmm6, %%xmm1\n\t" /* load input blocks */ "movdqu 0*16(%[inbuf]), %%xmm2\n\t" "movdqu 1*16(%[inbuf]), %%xmm3\n\t" "movdqu 2*16(%[inbuf]), %%xmm4\n\t" "movdqu 3*16(%[inbuf]), %%xmm6\n\t" /* update IV */ : /* No output */ : [inbuf] "r" (inbuf) : "memory"); do_aesni_enc_vec4 (ctx); asm volatile ("movdqu 0*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu %%xmm1, 0*16(%[outbuf])\n\t" "movdqu 1*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm5, %%xmm2\n\t" "movdqu %%xmm2, 1*16(%[outbuf])\n\t" "movdqu 2*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqu %%xmm3, 2*16(%[outbuf])\n\t" "movdqu 3*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, 3*16(%[outbuf])\n\t" : /* No output */ : [inbuf] "r" (inbuf), [outbuf] "r" (outbuf) : "memory"); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } asm volatile ("movdqu %%xmm6, %%xmm0\n\t" ::: "cc"); for ( ;nblocks; nblocks-- ) { do_aesni_enc (ctx); asm volatile ("movdqa %%xmm0, %%xmm6\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm6, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : [inbuf] "m" (*inbuf) : "memory" ); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm0, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_6 (); } void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks) { aesni_prepare_2_6_variable; aesni_prepare (); aesni_prepare_2_6(); asm volatile ("movdqu %[iv], %%xmm5\n\t" /* use xmm5 as fast IV storage */ : /* No output */ : [iv] "m" (*iv) : "memory"); #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_7_15_variable; aesni_prepare_7_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { asm volatile ("movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */ "movdqu 1*16(%[inbuf]), %%xmm2\n\t" "movdqu 2*16(%[inbuf]), %%xmm3\n\t" "movdqu 3*16(%[inbuf]), %%xmm4\n\t" "movdqu 4*16(%[inbuf]), %%xmm8\n\t" "movdqu 5*16(%[inbuf]), %%xmm9\n\t" "movdqu 6*16(%[inbuf]), %%xmm10\n\t" "movdqu 7*16(%[inbuf]), %%xmm11\n\t" "movdqa %%xmm1, %%xmm12\n\t" "movdqa %%xmm2, %%xmm13\n\t" "movdqa %%xmm3, %%xmm14\n\t" "movdqa %%xmm4, %%xmm15\n\t" : /* No output */ : [inbuf] "r" (inbuf) : "memory"); do_aesni_dec_vec8 (ctx); asm volatile ("pxor %%xmm5, %%xmm1\n\t" /* xor IV with output */ "pxor %%xmm12, %%xmm2\n\t" /* xor IV with output */ "movdqu 4*16(%[inbuf]), %%xmm12\n\t" "pxor %%xmm13, %%xmm3\n\t" /* xor IV with output */ "movdqu 5*16(%[inbuf]), %%xmm13\n\t" "pxor %%xmm14, %%xmm4\n\t" /* xor IV with output */ "movdqu 6*16(%[inbuf]), %%xmm14\n\t" "pxor %%xmm15, %%xmm8\n\t" /* xor IV with output */ "movdqu 7*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm12, %%xmm9\n\t" /* xor IV with output */ "movdqu %%xmm1, 0*16(%[outbuf])\n\t" "pxor %%xmm13, %%xmm10\n\t" /* xor IV with output */ "movdqu %%xmm2, 1*16(%[outbuf])\n\t" "pxor %%xmm14, %%xmm11\n\t" /* xor IV with output */ "movdqu %%xmm3, 2*16(%[outbuf])\n\t" "movdqu %%xmm4, 3*16(%[outbuf])\n\t" "movdqu %%xmm8, 4*16(%[outbuf])\n\t" "movdqu %%xmm9, 5*16(%[outbuf])\n\t" "movdqu %%xmm10, 6*16(%[outbuf])\n\t" "movdqu %%xmm11, 7*16(%[outbuf])\n\t" : /* No output */ : [inbuf] "r" (inbuf), [outbuf] "r" (outbuf) : "memory"); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } aesni_cleanup_7_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { asm volatile ("movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */ "movdqu 1*16(%[inbuf]), %%xmm2\n\t" "movdqu 2*16(%[inbuf]), %%xmm3\n\t" "movdqu 3*16(%[inbuf]), %%xmm4\n\t" : /* No output */ : [inbuf] "r" (inbuf) : "memory"); do_aesni_dec_vec4 (ctx); asm volatile ("pxor %%xmm5, %%xmm1\n\t" /* xor IV with output */ "movdqu 0*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ "movdqu %%xmm1, 0*16(%[outbuf])\n\t" "pxor %%xmm5, %%xmm2\n\t" /* xor IV with output */ "movdqu 1*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ "movdqu %%xmm2, 1*16(%[outbuf])\n\t" "pxor %%xmm5, %%xmm3\n\t" /* xor IV with output */ "movdqu 2*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ "movdqu %%xmm3, 2*16(%[outbuf])\n\t" "pxor %%xmm5, %%xmm4\n\t" /* xor IV with output */ "movdqu 3*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ "movdqu %%xmm4, 3*16(%[outbuf])\n\t" : /* No output */ : [inbuf] "r" (inbuf), [outbuf] "r" (outbuf) : "memory"); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { asm volatile ("movdqu %[inbuf], %%xmm0\n\t" "movdqa %%xmm0, %%xmm2\n\t" /* use xmm2 as savebuf */ : /* No output */ : [inbuf] "m" (*inbuf) : "memory"); /* uses only xmm0 and xmm1 */ do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" /* xor IV with output */ "movdqu %%xmm0, %[outbuf]\n\t" "movdqu %%xmm2, %%xmm5\n\t" /* store savebuf as new IV */ : [outbuf] "=m" (*outbuf) : : "memory"); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm5, %[iv]\n\t" /* store IV */ : /* No output */ : [iv] "m" (*iv) : "memory"); aesni_cleanup (); aesni_cleanup_2_6 (); } static void aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; const unsigned char *l; aesni_prepare_2_6_variable; aesni_prepare (); aesni_prepare_2_6 (); /* Preload Offset and Checksum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" "movdqu %[ctr], %%xmm6\n\t" : /* No output */ : [iv] "m" (*c->u_iv.iv), [ctr] "m" (*c->u_ctr.ctr) : "memory" ); for ( ;nblocks && n % 4; nblocks-- ) { l = ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm0, %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_7_15_variable; aesni_prepare_7_15(); asm volatile ("movdqu %[l0], %%xmm7\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]) : "memory" ); for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; l = ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l1], %%xmm10\n\t" "movdqu %[inbuf0], %%xmm1\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm1, %%xmm6\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqa %%xmm5, %%xmm12\n\t" : : [l1] "m" (*c->u_mode.ocb.L[1]), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm10, %%xmm5\n\t" "pxor %%xmm2, %%xmm6\n\t" "pxor %%xmm5, %%xmm2\n\t" "movdqa %%xmm5, %%xmm13\n\t" : : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm3, %%xmm6\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqa %%xmm5, %%xmm14\n\t" : : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l3], %%xmm15\n\t" "movdqu %[inbuf3], %%xmm4\n\t" "pxor %%xmm15, %%xmm5\n\t" "pxor %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqa %%xmm5, %%xmm15\n\t" : : [l3] "m" (*l), [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); n += 4; l = ocb_get_l(c, n); asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm8, %%xmm6\n\t" "pxor %%xmm5, %%xmm8\n\t" "movdqu %%xmm5, %[outbuf4]\n\t" : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)) : [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf5], %%xmm9\n\t" "pxor %%xmm10, %%xmm5\n\t" "pxor %%xmm9, %%xmm6\n\t" "pxor %%xmm5, %%xmm9\n\t" "movdqu %%xmm5, %[outbuf5]\n\t" : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) : [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf6], %%xmm10\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm10, %%xmm6\n\t" "pxor %%xmm5, %%xmm10\n\t" "movdqu %%xmm5, %[outbuf6]\n\t" : [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)) : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l7], %%xmm11\n\t" "pxor %%xmm11, %%xmm5\n\t" "movdqu %[inbuf7], %%xmm11\n\t" "pxor %%xmm11, %%xmm6\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), [inbuf7] "m" (*(inbuf + 7 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec8 (ctx); asm volatile ("pxor %%xmm12, %%xmm1\n\t" "pxor %%xmm13, %%xmm2\n\t" "movdqu %[outbuf4],%%xmm0\n\t" "movdqu %[outbuf5],%%xmm12\n\t" "movdqu %[outbuf6],%%xmm13\n\t" "pxor %%xmm14, %%xmm3\n\t" "pxor %%xmm15, %%xmm4\n\t" "pxor %%xmm0, %%xmm8\n\t" "pxor %%xmm12, %%xmm9\n\t" "pxor %%xmm13, %%xmm10\n\t" "pxor %%xmm5, %%xmm11\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" "movdqu %%xmm8, %[outbuf4]\n\t" "movdqu %%xmm9, %[outbuf5]\n\t" "movdqu %%xmm10, %[outbuf6]\n\t" "movdqu %%xmm11, %[outbuf7]\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)), [outbuf4] "+m" (*(outbuf + 4 * BLOCKSIZE)), [outbuf5] "+m" (*(outbuf + 5 * BLOCKSIZE)), [outbuf6] "+m" (*(outbuf + 6 * BLOCKSIZE)), [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE)) : : "memory" ); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } aesni_cleanup_7_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; l = ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l0], %%xmm4\n\t" "movdqu %[inbuf0], %%xmm1\n\t" "pxor %%xmm4, %%xmm5\n\t" "pxor %%xmm1, %%xmm6\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu %%xmm5, %[outbuf0]\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) : [l0] "m" (*c->u_mode.ocb.L[0]), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l1], %%xmm0\n\t" "movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm2, %%xmm6\n\t" "pxor %%xmm5, %%xmm2\n\t" "movdqu %%xmm5, %[outbuf1]\n\t" : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) : [l1] "m" (*c->u_mode.ocb.L[1]), [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" "pxor %%xmm4, %%xmm5\n\t" "pxor %%xmm3, %%xmm6\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqu %%xmm5, %[outbuf2]\n\t" : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l3], %%xmm4\n\t" "pxor %%xmm4, %%xmm5\n\t" "movdqu %[inbuf3], %%xmm4\n\t" "pxor %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm4\n\t" : : [l3] "m" (*l), [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec4 (ctx); asm volatile ("movdqu %[outbuf0],%%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %[outbuf1],%%xmm0\n\t" "pxor %%xmm0, %%xmm2\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "movdqu %[outbuf2],%%xmm0\n\t" "pxor %%xmm0, %%xmm3\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) : : "memory" ); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { l = ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm0, %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" "movdqu %%xmm6, %[ctr]\n\t" : [iv] "=m" (*c->u_iv.iv), [ctr] "=m" (*c->u_ctr.ctr) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_6 (); } static void aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; const unsigned char *l; aesni_prepare_2_6_variable; aesni_prepare (); aesni_prepare_2_6 (); /* Preload Offset and Checksum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" "movdqu %[ctr], %%xmm6\n\t" : /* No output */ : [iv] "m" (*c->u_iv.iv), [ctr] "m" (*c->u_ctr.ctr) : "memory" ); for ( ;nblocks && n % 4; nblocks-- ) { l = ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ /* Checksum_i = Checksum_{i-1} xor P_i */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_7_15_variable; aesni_prepare_7_15(); asm volatile ("movdqu %[l0], %%xmm7\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]) : "memory" ); for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; l = ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ /* Checksum_i = Checksum_{i-1} xor P_i */ asm volatile ("movdqu %[l1], %%xmm10\n\t" "movdqu %[inbuf0], %%xmm1\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqa %%xmm5, %%xmm12\n\t" : : [l1] "m" (*c->u_mode.ocb.L[1]), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm10, %%xmm5\n\t" "pxor %%xmm5, %%xmm2\n\t" "movdqa %%xmm5, %%xmm13\n\t" : : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqa %%xmm5, %%xmm14\n\t" : : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l3], %%xmm0\n\t" "movdqu %[inbuf3], %%xmm4\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqa %%xmm5, %%xmm15\n\t" : : [l3] "m" (*l), [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); n += 4; l = ocb_get_l(c, n); asm volatile ("movdqu %[inbuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm5, %%xmm8\n\t" "movdqu %%xmm5, %[outbuf4]\n\t" : [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)) : [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf5], %%xmm9\n\t" "pxor %%xmm10, %%xmm5\n\t" "pxor %%xmm5, %%xmm9\n\t" "movdqu %%xmm5, %[outbuf5]\n\t" : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)) : [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf6], %%xmm10\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm5, %%xmm10\n\t" "movdqu %%xmm5, %[outbuf6]\n\t" : [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)) : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l7], %%xmm0\n\t" "movdqu %[inbuf7], %%xmm11\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), [inbuf7] "m" (*(inbuf + 7 * BLOCKSIZE)) : "memory" ); do_aesni_dec_vec8 (ctx); asm volatile ("pxor %%xmm12, %%xmm1\n\t" "pxor %%xmm13, %%xmm2\n\t" "movdqu %[outbuf4],%%xmm0\n\t" "movdqu %[outbuf5],%%xmm12\n\t" "movdqu %[outbuf6],%%xmm13\n\t" "pxor %%xmm14, %%xmm3\n\t" "pxor %%xmm15, %%xmm4\n\t" "pxor %%xmm0, %%xmm8\n\t" "pxor %%xmm12, %%xmm9\n\t" "pxor %%xmm13, %%xmm10\n\t" "pxor %%xmm5, %%xmm11\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" "movdqu %%xmm8, %[outbuf4]\n\t" "movdqu %%xmm9, %[outbuf5]\n\t" "movdqu %%xmm10, %[outbuf6]\n\t" "movdqu %%xmm11, %[outbuf7]\n\t" "pxor %%xmm2, %%xmm1\n\t" "pxor %%xmm4, %%xmm1\n\t" "pxor %%xmm9, %%xmm1\n\t" "pxor %%xmm11, %%xmm1\n\t" "pxor %%xmm3, %%xmm6\n\t" "pxor %%xmm8, %%xmm6\n\t" "pxor %%xmm10, %%xmm6\n\t" "pxor %%xmm1, %%xmm6\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)), [outbuf4] "+m" (*(outbuf + 4 * BLOCKSIZE)), [outbuf5] "+m" (*(outbuf + 5 * BLOCKSIZE)), [outbuf6] "+m" (*(outbuf + 6 * BLOCKSIZE)), [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE)) : : "memory" ); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } aesni_cleanup_7_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; l = ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ /* Checksum_i = Checksum_{i-1} xor P_i */ asm volatile ("movdqu %[l0], %%xmm4\n\t" "movdqu %[inbuf0], %%xmm1\n\t" "pxor %%xmm4, %%xmm5\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu %%xmm5, %[outbuf0]\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) : [l0] "m" (*c->u_mode.ocb.L[0]), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l1], %%xmm0\n\t" "movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm2\n\t" "movdqu %%xmm5, %[outbuf1]\n\t" : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) : [l1] "m" (*c->u_mode.ocb.L[1]), [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" "pxor %%xmm4, %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqu %%xmm5, %[outbuf2]\n\t" : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l3], %%xmm0\n\t" "movdqu %[inbuf3], %%xmm4\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" : : [l3] "m" (*l), [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_dec_vec4 (ctx); asm volatile ("movdqu %[outbuf0],%%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %[outbuf1],%%xmm0\n\t" "pxor %%xmm0, %%xmm2\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "movdqu %[outbuf2],%%xmm0\n\t" "pxor %%xmm0, %%xmm3\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" "pxor %%xmm1, %%xmm6\n\t" "pxor %%xmm2, %%xmm6\n\t" "pxor %%xmm3, %%xmm6\n\t" "pxor %%xmm4, %%xmm6\n\t" : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) : : "memory" ); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { l = ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ /* Checksum_i = Checksum_{i-1} xor P_i */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" "movdqu %%xmm6, %[ctr]\n\t" : [iv] "=m" (*c->u_iv.iv), [ctr] "=m" (*c->u_ctr.ctr) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_6 (); } void _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { if (encrypt) aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); else aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); } void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; u64 n = c->u_mode.ocb.aad_nblocks; const unsigned char *l; aesni_prepare_2_6_variable; aesni_prepare (); aesni_prepare_2_6 (); /* Preload Offset and Sum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" "movdqu %[ctr], %%xmm6\n\t" : /* No output */ : [iv] "m" (*c->u_mode.ocb.aad_offset), [ctr] "m" (*c->u_mode.ocb.aad_sum) : "memory" ); for ( ;nblocks && n % 4; nblocks-- ) { l = ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[abuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [abuf] "m" (*abuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm0, %%xmm6\n\t" : : : "memory" ); abuf += BLOCKSIZE; } #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_7_15_variable; aesni_prepare_7_15(); asm volatile ("movdqu %[l0], %%xmm7\n\t" "movdqu %[l1], %%xmm12\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]), [l1] "m" (*c->u_mode.ocb.L[1]) : "memory" ); for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; l = ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[abuf0], %%xmm1\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm5, %%xmm1\n\t" : : [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[abuf1], %%xmm2\n\t" "pxor %%xmm12, %%xmm5\n\t" "pxor %%xmm5, %%xmm2\n\t" : : [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[abuf2], %%xmm3\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" : : [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l3], %%xmm0\n\t" "movdqu %[abuf3], %%xmm4\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" : : [l3] "m" (*l), [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) : "memory" ); n += 4; l = ocb_get_l(c, n); asm volatile ("movdqu %[abuf4], %%xmm8\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm5, %%xmm8\n\t" : : [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[abuf5], %%xmm9\n\t" "pxor %%xmm12, %%xmm5\n\t" "pxor %%xmm5, %%xmm9\n\t" : : [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[abuf6], %%xmm10\n\t" "pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm5, %%xmm10\n\t" : : [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l7], %%xmm0\n\t" "movdqu %[abuf7], %%xmm11\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), [abuf7] "m" (*(abuf + 7 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec8 (ctx); asm volatile ("pxor %%xmm2, %%xmm1\n\t" "pxor %%xmm3, %%xmm1\n\t" "pxor %%xmm4, %%xmm1\n\t" "pxor %%xmm8, %%xmm1\n\t" "pxor %%xmm9, %%xmm6\n\t" "pxor %%xmm10, %%xmm6\n\t" "pxor %%xmm11, %%xmm6\n\t" "pxor %%xmm1, %%xmm6\n\t" : : : "memory" ); abuf += 8*BLOCKSIZE; } aesni_cleanup_7_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; l = ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[l0], %%xmm4\n\t" "movdqu %[abuf0], %%xmm1\n\t" "pxor %%xmm4, %%xmm5\n\t" "pxor %%xmm5, %%xmm1\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]), [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l1], %%xmm0\n\t" "movdqu %[abuf1], %%xmm2\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm2\n\t" : : [l1] "m" (*c->u_mode.ocb.L[1]), [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[abuf2], %%xmm3\n\t" "pxor %%xmm4, %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" : : [l2] "m" (*c->u_mode.ocb.L[0]), [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l3], %%xmm0\n\t" "movdqu %[abuf3], %%xmm4\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" : : [l3] "m" (*l), [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec4 (ctx); asm volatile ("pxor %%xmm1, %%xmm6\n\t" "pxor %%xmm2, %%xmm6\n\t" "pxor %%xmm3, %%xmm6\n\t" "pxor %%xmm4, %%xmm6\n\t" : : : "memory" ); abuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { l = ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[abuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [abuf] "m" (*abuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm0, %%xmm6\n\t" : : : "memory" ); abuf += BLOCKSIZE; } c->u_mode.ocb.aad_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" "movdqu %%xmm6, %[ctr]\n\t" : [iv] "=m" (*c->u_mode.ocb.aad_offset), [ctr] "=m" (*c->u_mode.ocb.aad_sum) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_6 (); } +static const u64 xts_gfmul_const[16] __attribute__ ((aligned (16))) = + { 0x87, 0x01 }; + + +static void +_gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) +{ + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6 (); + + /* Preload Tweak */ + asm volatile ("movdqu %[tweak], %%xmm5\n\t" + "movdqa %[gfmul], %%xmm6\n\t" + : + : [tweak] "m" (*tweak), + [gfmul] "m" (*xts_gfmul_const) + : "memory" ); + + for ( ;nblocks >= 4; nblocks -= 4 ) + { + asm volatile ("pshufd $0x13, %%xmm5, %%xmm4\n\t" + "movdqu %[inbuf0], %%xmm1\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm5, %[outbuf0]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * 16)) + : [inbuf0] "m" (*(inbuf + 0 * 16)) + : "memory" ); + + asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm5, %[outbuf1]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * 16)) + : [inbuf1] "m" (*(inbuf + 1 * 16)) + : "memory" ); + + asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm5, %[outbuf2]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * 16)) + : [inbuf2] "m" (*(inbuf + 2 * 16)) + : "memory" ); + + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf3], %%xmm4\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm5, %[outbuf3]\n\t" + + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf3] "=m" (*(outbuf + 3 * 16)) + : [inbuf3] "m" (*(inbuf + 3 * 16)) + : "memory" ); + + do_aesni_enc_vec4 (ctx); + + asm volatile ("movdqu %[outbuf0], %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %[outbuf1], %%xmm0\n\t" + "movdqu %%xmm1, %[outbuf0]\n\t" + "movdqu %[outbuf2], %%xmm1\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "movdqu %[outbuf3], %%xmm0\n\t" + "pxor %%xmm1, %%xmm3\n\t" + "pxor %%xmm0, %%xmm4\n\t" + "movdqu %%xmm2, %[outbuf1]\n\t" + "movdqu %%xmm3, %[outbuf2]\n\t" + "movdqu %%xmm4, %[outbuf3]\n\t" + : [outbuf0] "+m" (*(outbuf + 0 * 16)), + [outbuf1] "+m" (*(outbuf + 1 * 16)), + [outbuf2] "+m" (*(outbuf + 2 * 16)), + [outbuf3] "+m" (*(outbuf + 3 * 16)) + : + : "memory" ); + + outbuf += BLOCKSIZE * 4; + inbuf += BLOCKSIZE * 4; + } + + for ( ;nblocks; nblocks-- ) + { + asm volatile ("movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "movdqa %%xmm5, %%xmm4\n\t" + + "pshufd $0x13, %%xmm5, %%xmm1\n\t" + "psrad $31, %%xmm1\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm1\n\t" + "pxor %%xmm1, %%xmm5\n\t" + : + : [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("pxor %%xmm4, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm5, %[tweak]\n\t" + : [tweak] "=m" (*tweak) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +static void +_gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks) +{ + aesni_prepare_2_6_variable; + + aesni_prepare (); + aesni_prepare_2_6 (); + + /* Preload Tweak */ + asm volatile ("movdqu %[tweak], %%xmm5\n\t" + "movdqa %[gfmul], %%xmm6\n\t" + : + : [tweak] "m" (*tweak), + [gfmul] "m" (*xts_gfmul_const) + : "memory" ); + + for ( ;nblocks >= 4; nblocks -= 4 ) + { + asm volatile ("pshufd $0x13, %%xmm5, %%xmm4\n\t" + "movdqu %[inbuf0], %%xmm1\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm5, %[outbuf0]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * 16)) + : [inbuf0] "m" (*(inbuf + 0 * 16)) + : "memory" ); + + asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm5, %[outbuf1]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * 16)) + : [inbuf1] "m" (*(inbuf + 1 * 16)) + : "memory" ); + + asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm5, %[outbuf2]\n\t" + + "movdqa %%xmm4, %%xmm0\n\t" + "paddd %%xmm4, %%xmm4\n\t" + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * 16)) + : [inbuf2] "m" (*(inbuf + 2 * 16)) + : "memory" ); + + asm volatile ("movdqa %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf3], %%xmm4\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm5, %[outbuf3]\n\t" + + "psrad $31, %%xmm0\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm0\n\t" + "pxor %%xmm0, %%xmm5\n\t" + : [outbuf3] "=m" (*(outbuf + 3 * 16)) + : [inbuf3] "m" (*(inbuf + 3 * 16)) + : "memory" ); + + do_aesni_dec_vec4 (ctx); + + asm volatile ("movdqu %[outbuf0], %%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %[outbuf1], %%xmm0\n\t" + "movdqu %%xmm1, %[outbuf0]\n\t" + "movdqu %[outbuf2], %%xmm1\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "movdqu %[outbuf3], %%xmm0\n\t" + "pxor %%xmm1, %%xmm3\n\t" + "pxor %%xmm0, %%xmm4\n\t" + "movdqu %%xmm2, %[outbuf1]\n\t" + "movdqu %%xmm3, %[outbuf2]\n\t" + "movdqu %%xmm4, %[outbuf3]\n\t" + : [outbuf0] "+m" (*(outbuf + 0 * 16)), + [outbuf1] "+m" (*(outbuf + 1 * 16)), + [outbuf2] "+m" (*(outbuf + 2 * 16)), + [outbuf3] "+m" (*(outbuf + 3 * 16)) + : + : "memory" ); + + outbuf += BLOCKSIZE * 4; + inbuf += BLOCKSIZE * 4; + } + + for ( ;nblocks; nblocks-- ) + { + asm volatile ("movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "movdqa %%xmm5, %%xmm4\n\t" + + "pshufd $0x13, %%xmm5, %%xmm1\n\t" + "psrad $31, %%xmm1\n\t" + "paddq %%xmm5, %%xmm5\n\t" + "pand %%xmm6, %%xmm1\n\t" + "pxor %%xmm1, %%xmm5\n\t" + : + : [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_dec (ctx); + + asm volatile ("pxor %%xmm4, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + + asm volatile ("movdqu %%xmm5, %[tweak]\n\t" + : [tweak] "=m" (*tweak) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); +} + + +void +_gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak, + unsigned char *outbuf, const unsigned char *inbuf, + size_t nblocks, int encrypt) +{ + if (encrypt) + _gcry_aes_aesni_xts_enc(ctx, tweak, outbuf, inbuf, nblocks); + else + _gcry_aes_aesni_xts_dec(ctx, tweak, outbuf, inbuf, nblocks); +} + #endif /* USE_AESNI */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 8637195a..548bfa09 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1,2022 +1,2106 @@ /* Rijndael (AES) for GnuPG * Copyright (C) 2000, 2001, 2002, 2003, 2007, * 2008, 2011, 2012 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . ******************************************************************* * The code here is based on the optimized implementation taken from * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, * which carries this notice: *------------------------------------------ * rijndael-alg-fst.c v2.3 April '2000 * * Optimised ANSI C code * * authors: v1.0: Antoon Bosselaers * v2.0: Vincent Rijmen * v2.3: Paulo Barreto * * This code is placed in the public domain. *------------------------------------------ * * The SP800-38a document is available at: * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf * */ #include #include #include #include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_AMD64_ASM /* AMD64 assembly implementations of AES */ extern unsigned int _gcry_aes_amd64_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_amd64_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_AMD64_ASM*/ #ifdef USE_AESNI /* AES-NI (AMD64 & i386) accelerated implementations of AES */ extern void _gcry_aes_aesni_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks, int cbc_mac); extern void _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *ctr, size_t nblocks); extern void _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); +extern void _gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, + unsigned char *tweak, + unsigned char *outbuf, + const unsigned char *inbuf, + size_t nblocks, int encrypt); #endif #ifdef USE_SSSE3 /* SSSE3 (AMD64) vector permutation implementation of AES */ extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks, int cbc_mac); extern void _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *ctr, size_t nblocks); extern void _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); #endif #ifdef USE_PADLOCK extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); extern unsigned int _gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); #endif #ifdef USE_ARM_ASM /* ARM assembly implementations of AES */ extern unsigned int _gcry_aes_arm_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_arm_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_ARM_ASM*/ #ifdef USE_ARM_CE /* ARMv8 Crypto Extension implementations of AES */ extern void _gcry_aes_armv8_ce_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_armv8_ce_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_armv8_ce_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks, int cbc_mac); extern void _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *ctr, size_t nblocks); extern void _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); extern void _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); #endif /*USE_ARM_ASM*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); /* All the numbers. */ #include "rijndael-tables.h" /* Function prototypes. */ static const char *selftest(void); /* Prefetching for encryption/decryption tables. */ static void prefetch_table(const volatile byte *tab, size_t len) { size_t i; for (i = 0; i < len; i += 8 * 32) { (void)tab[i + 0 * 32]; (void)tab[i + 1 * 32]; (void)tab[i + 2 * 32]; (void)tab[i + 3 * 32]; (void)tab[i + 4 * 32]; (void)tab[i + 5 * 32]; (void)tab[i + 6 * 32]; (void)tab[i + 7 * 32]; } (void)tab[len - 1]; } static void prefetch_enc(void) { prefetch_table((const void *)encT, sizeof(encT)); } static void prefetch_dec(void) { prefetch_table((const void *)&dec_tables, sizeof(dec_tables)); } /* Perform the key setup. */ static gcry_err_code_t do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) { static int initialized = 0; static const char *selftest_failed = 0; int rounds; int i,j, r, t, rconpointer = 0; int KC; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ || defined(USE_ARM_CE) unsigned int hwfeatures; #endif /* The on-the-fly self tests are only run in non-fips mode. In fips mode explicit self-tests are required. Actually the on-the-fly self-tests are not fully thread-safe and it might happen that a failed self-test won't get noticed in another thread. FIXME: We might want to have a central registry of succeeded self-tests. */ if (!fips_mode () && !initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("%s\n", selftest_failed ); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; if( keylen == 128/8 ) { rounds = 10; KC = 4; } else if ( keylen == 192/8 ) { rounds = 12; KC = 6; } else if ( keylen == 256/8 ) { rounds = 14; KC = 8; } else return GPG_ERR_INV_KEYLEN; ctx->rounds = rounds; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ || defined(USE_ARM_CE) hwfeatures = _gcry_get_hw_features (); #endif ctx->decryption_prepared = 0; #ifdef USE_PADLOCK ctx->use_padlock = 0; #endif #ifdef USE_AESNI ctx->use_aesni = 0; #endif #ifdef USE_SSSE3 ctx->use_ssse3 = 0; #endif #ifdef USE_ARM_CE ctx->use_arm_ce = 0; #endif if (0) { ; } #ifdef USE_AESNI else if (hwfeatures & HWF_INTEL_AESNI) { ctx->encrypt_fn = _gcry_aes_aesni_encrypt; ctx->decrypt_fn = _gcry_aes_aesni_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_aesni = 1; } #endif #ifdef USE_PADLOCK else if (hwfeatures & HWF_PADLOCK_AES && keylen == 128/8) { ctx->encrypt_fn = _gcry_aes_padlock_encrypt; ctx->decrypt_fn = _gcry_aes_padlock_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_padlock = 1; memcpy (ctx->padlockkey, key, keylen); } #endif #ifdef USE_SSSE3 else if (hwfeatures & HWF_INTEL_SSSE3) { ctx->encrypt_fn = _gcry_aes_ssse3_encrypt; ctx->decrypt_fn = _gcry_aes_ssse3_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_ssse3 = 1; } #endif #ifdef USE_ARM_CE else if (hwfeatures & HWF_ARM_AES) { ctx->encrypt_fn = _gcry_aes_armv8_ce_encrypt; ctx->decrypt_fn = _gcry_aes_armv8_ce_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->use_arm_ce = 1; } #endif else { ctx->encrypt_fn = do_encrypt; ctx->decrypt_fn = do_decrypt; ctx->prefetch_enc_fn = prefetch_enc; ctx->prefetch_dec_fn = prefetch_dec; } /* NB: We don't yet support Padlock hardware key generation. */ if (0) { ; } #ifdef USE_AESNI else if (ctx->use_aesni) _gcry_aes_aesni_do_setkey (ctx, key); #endif #ifdef USE_SSSE3 else if (ctx->use_ssse3) _gcry_aes_ssse3_do_setkey (ctx, key); #endif #ifdef USE_ARM_CE else if (ctx->use_arm_ce) _gcry_aes_armv8_ce_setkey (ctx, key); #endif else { const byte *sbox = ((const byte *)encT) + 1; union { PROPERLY_ALIGNED_TYPE dummy; byte data[MAXKC][4]; u32 data32[MAXKC]; } tkk[2]; #define k tkk[0].data #define k_u32 tkk[0].data32 #define tk tkk[1].data #define tk_u32 tkk[1].data32 #define W (ctx->keyschenc) #define W_u32 (ctx->keyschenc32) prefetch_enc(); for (i = 0; i < keylen; i++) { k[i >> 2][i & 3] = key[i]; } for (j = KC-1; j >= 0; j--) { tk_u32[j] = k_u32[j]; } r = 0; t = 0; /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } while (r < rounds + 1) { /* While not enough round key material calculated calculate new values. */ tk[0][0] ^= sbox[tk[KC-1][1] * 4]; tk[0][1] ^= sbox[tk[KC-1][2] * 4]; tk[0][2] ^= sbox[tk[KC-1][3] * 4]; tk[0][3] ^= sbox[tk[KC-1][0] * 4]; tk[0][0] ^= rcon[rconpointer++]; if (KC != 8) { for (j = 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } else { for (j = 1; j < KC/2; j++) { tk_u32[j] ^= tk_u32[j-1]; } tk[KC/2][0] ^= sbox[tk[KC/2 - 1][0] * 4]; tk[KC/2][1] ^= sbox[tk[KC/2 - 1][1] * 4]; tk[KC/2][2] ^= sbox[tk[KC/2 - 1][2] * 4]; tk[KC/2][3] ^= sbox[tk[KC/2 - 1][3] * 4]; for (j = KC/2 + 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } } #undef W #undef tk #undef k #undef W_u32 #undef tk_u32 #undef k_u32 wipememory(&tkk, sizeof(tkk)); } return 0; } static gcry_err_code_t rijndael_setkey (void *context, const byte *key, const unsigned keylen) { RIJNDAEL_context *ctx = context; return do_setkey (ctx, key, keylen); } /* Make a decryption key from an encryption key. */ static void prepare_decryption( RIJNDAEL_context *ctx ) { int r; if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_prepare_decryption (ctx); } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_prepare_decryption (ctx); } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_prepare_decryption (ctx); } #endif /*USE_SSSE3*/ #ifdef USE_PADLOCK else if (ctx->use_padlock) { /* Padlock does not need decryption subkeys. */ } #endif /*USE_PADLOCK*/ else { const byte *sbox = ((const byte *)encT) + 1; prefetch_enc(); prefetch_dec(); ctx->keyschdec32[0][0] = ctx->keyschenc32[0][0]; ctx->keyschdec32[0][1] = ctx->keyschenc32[0][1]; ctx->keyschdec32[0][2] = ctx->keyschenc32[0][2]; ctx->keyschdec32[0][3] = ctx->keyschenc32[0][3]; for (r = 1; r < ctx->rounds; r++) { u32 *wi = ctx->keyschenc32[r]; u32 *wo = ctx->keyschdec32[r]; u32 wt; wt = wi[0]; wo[0] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[1]; wo[1] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[2]; wo[2] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[3]; wo[3] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); } ctx->keyschdec32[r][0] = ctx->keyschenc32[r][0]; ctx->keyschdec32[r][1] = ctx->keyschenc32[r][1]; ctx->keyschdec32[r][2] = ctx->keyschenc32[r][2]; ctx->keyschdec32[r][3] = ctx->keyschenc32[r][3]; } } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Encrypt one block. A and B may be the same. */ static unsigned int do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschenc32) const byte *sbox = ((const byte *)encT) + 1; int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[0][0]; sa[1] = sb[1] ^ rk[0][1]; sa[2] = sb[2] ^ rk[0][2]; sa[3] = sb[3] ^ rk[0][3]; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; for (r = 2; r < rounds; r++) { sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r++; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } /* Last round is special. */ sb[0] = (sbox[(byte)(sa[0] >> (0 * 8)) * 4]) << (0 * 8); sb[3] = (sbox[(byte)(sa[0] >> (1 * 8)) * 4]) << (1 * 8); sb[2] = (sbox[(byte)(sa[0] >> (2 * 8)) * 4]) << (2 * 8); sb[1] = (sbox[(byte)(sa[0] >> (3 * 8)) * 4]) << (3 * 8); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= (sbox[(byte)(sa[1] >> (0 * 8)) * 4]) << (0 * 8); sa[0] ^= (sbox[(byte)(sa[1] >> (1 * 8)) * 4]) << (1 * 8); sb[3] ^= (sbox[(byte)(sa[1] >> (2 * 8)) * 4]) << (2 * 8); sb[2] ^= (sbox[(byte)(sa[1] >> (3 * 8)) * 4]) << (3 * 8); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= (sbox[(byte)(sa[2] >> (0 * 8)) * 4]) << (0 * 8); sa[1] ^= (sbox[(byte)(sa[2] >> (1 * 8)) * 4]) << (1 * 8); sa[0] ^= (sbox[(byte)(sa[2] >> (2 * 8)) * 4]) << (2 * 8); sb[3] ^= (sbox[(byte)(sa[2] >> (3 * 8)) * 4]) << (3 * 8); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= (sbox[(byte)(sa[3] >> (0 * 8)) * 4]) << (0 * 8); sa[2] ^= (sbox[(byte)(sa[3] >> (1 * 8)) * 4]) << (1 * 8); sa[1] ^= (sbox[(byte)(sa[3] >> (2 * 8)) * 4]) << (2 * 8); sa[0] ^= (sbox[(byte)(sa[3] >> (3 * 8)) * 4]) << (3 * 8); sa[3] = rk[r][3] ^ sb[3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56 + 2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM # ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, encT); # else /* Call SystemV ABI function without storing non-volatile XMM registers, * as target function does not use vector instruction sets. */ const void *key = ctx->keyschenc; uintptr_t rounds = ctx->rounds; uintptr_t ret; asm volatile ("movq %[encT], %%r8\n\t" "callq *%[ret]\n\t" : [ret] "=a" (ret), "+D" (key), "+S" (bx), "+d" (ax), "+c" (rounds) : "0" (_gcry_aes_amd64_encrypt_block), [encT] "r" (encT) : "cc", "memory", "r8", "r9", "r10", "r11"); return ret; # endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */ #elif defined(USE_ARM_ASM) return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, encT); #else return do_encrypt_fn (ctx, bx, ax); #endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/ } static unsigned int rijndael_encrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); return ctx->encrypt_fn (ctx, b, a); } /* Bulk encryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { /* Encrypt the IV. */ burn_depth = encrypt_fn (ctx, iv, iv); /* XOR the input with the IV and store input into IV. */ buf_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char *last_iv; unsigned int burn_depth = 0; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; last_iv = iv; for ( ;nblocks; nblocks-- ) { buf_xor(outbuf, inbuf, last_iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, outbuf, outbuf); last_iv = outbuf; inbuf += BLOCKSIZE; if (!cbc_mac) outbuf += BLOCKSIZE; } if (last_iv != iv) buf_cpy (iv, last_iv, BLOCKSIZE); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CTR mode. Caller needs to make sure that CTR is aligned on a 16 byte boundary if AESNI; the minimum alignment is for an u32. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size BLOCKSIZE. */ void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; int i; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ burn_depth = encrypt_fn (ctx, tmp.x1, ctr); /* XOR the input with the encrypted counter and store in output. */ buf_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; /* Increment the counter. */ for (i = BLOCKSIZE; i > 0; i--) { ctr[i-1]++; if (ctr[i-1]) break; } } wipememory(&tmp, sizeof(tmp)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Decrypt one block. A and B may be the same. */ static unsigned int do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschdec32) int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[rounds][0]; sa[1] = sb[1] ^ rk[rounds][1]; sa[2] = sb[2] ^ rk[rounds][2]; sa[3] = sb[3] ^ rk[rounds][3]; for (r = rounds - 1; r > 1; r--) { sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r--; sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; /* Last round is special. */ sb[0] = inv_sbox[(byte)(sa[0] >> (0 * 8))] << (0 * 8); sb[1] = inv_sbox[(byte)(sa[0] >> (1 * 8))] << (1 * 8); sb[2] = inv_sbox[(byte)(sa[0] >> (2 * 8))] << (2 * 8); sb[3] = inv_sbox[(byte)(sa[0] >> (3 * 8))] << (3 * 8); sa[0] = sb[0] ^ rk[0][0]; sb[1] ^= inv_sbox[(byte)(sa[1] >> (0 * 8))] << (0 * 8); sb[2] ^= inv_sbox[(byte)(sa[1] >> (1 * 8))] << (1 * 8); sb[3] ^= inv_sbox[(byte)(sa[1] >> (2 * 8))] << (2 * 8); sa[0] ^= inv_sbox[(byte)(sa[1] >> (3 * 8))] << (3 * 8); sa[1] = sb[1] ^ rk[0][1]; sb[2] ^= inv_sbox[(byte)(sa[2] >> (0 * 8))] << (0 * 8); sb[3] ^= inv_sbox[(byte)(sa[2] >> (1 * 8))] << (1 * 8); sa[0] ^= inv_sbox[(byte)(sa[2] >> (2 * 8))] << (2 * 8); sa[1] ^= inv_sbox[(byte)(sa[2] >> (3 * 8))] << (3 * 8); sa[2] = sb[2] ^ rk[0][2]; sb[3] ^= inv_sbox[(byte)(sa[3] >> (0 * 8))] << (0 * 8); sa[0] ^= inv_sbox[(byte)(sa[3] >> (1 * 8))] << (1 * 8); sa[1] ^= inv_sbox[(byte)(sa[3] >> (2 * 8))] << (2 * 8); sa[2] ^= inv_sbox[(byte)(sa[3] >> (3 * 8))] << (3 * 8); sa[3] = sb[3] ^ rk[0][3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56+2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ /* Decrypt one block. AX and BX may be the same. */ static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM # ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, &dec_tables); # else /* Call SystemV ABI function without storing non-volatile XMM registers, * as target function does not use vector instruction sets. */ const void *key = ctx->keyschdec; uintptr_t rounds = ctx->rounds; uintptr_t ret; asm volatile ("movq %[dectabs], %%r8\n\t" "callq *%[ret]\n\t" : [ret] "=a" (ret), "+D" (key), "+S" (bx), "+d" (ax), "+c" (rounds) : "0" (_gcry_aes_amd64_decrypt_block), [dectabs] "r" (&dec_tables) : "cc", "memory", "r8", "r9", "r10", "r11"); return ret; # endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */ #elif defined(USE_ARM_ASM) return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, &dec_tables); #else return do_decrypt_fn (ctx, bx, ax); #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ } static inline void check_decryption_preparation (RIJNDAEL_context *ctx) { if ( !ctx->decryption_prepared ) { prepare_decryption ( ctx ); ctx->decryption_prepared = 1; } } static unsigned int rijndael_decrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); return ctx->decrypt_fn (ctx, b, a); } /* Bulk decryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { burn_depth = encrypt_fn (ctx, iv, iv); buf_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk decryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ burn_depth = decrypt_fn (ctx, savebuf, inbuf); buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption/decryption of complete blocks in OCB mode. */ size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (encrypt) { if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); } else { check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); } if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); burn_depth = 0; } #endif /*USE_ARM_CE*/ else if (encrypt) { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1); buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk authentication of complete blocks in OCB mode. */ size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned int burn_depth = 0; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); if (0) ; #ifdef USE_AESNI else if (ctx->use_aesni) { _gcry_aes_aesni_ocb_auth (c, abuf, nblocks); burn_depth = 0; } #endif /*USE_AESNI*/ #ifdef USE_SSSE3 else if (ctx->use_ssse3) { _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks); burn_depth = 0; } #endif /*USE_SSSE3*/ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) { _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks); burn_depth = 0; } #endif /*USE_ARM_CE*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.aad_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); abuf += BLOCKSIZE; } wipememory(&l_tmp, sizeof(l_tmp)); } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } +/* Bulk encryption/decryption of complete blocks in XTS mode. */ +void +_gcry_aes_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn_depth = 0; + rijndael_cryptfn_t crypt_fn; + u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry; + + if (encrypt) + { + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + crypt_fn = ctx->encrypt_fn; + } + else + { + check_decryption_preparation (ctx); + + if (ctx->prefetch_dec_fn) + ctx->prefetch_dec_fn(); + + crypt_fn = ctx->decrypt_fn; + } + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); + burn_depth = 0; + } +#endif /*USE_AESNI*/ + else + { + tweak_next_lo = buf_get_le64 (tweak + 0); + tweak_next_hi = buf_get_le64 (tweak + 8); + + while (nblocks) + { + tweak_lo = tweak_next_lo; + tweak_hi = tweak_next_hi; + + /* Xor-Encrypt/Decrypt-Xor block. */ + tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo; + tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi; + + buf_put_le64 (outbuf + 0, tmp_lo); + buf_put_le64 (outbuf + 8, tmp_hi); + + /* Generate next tweak. */ + carry = -(tweak_next_hi >> 63) & 0x87; + tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63); + tweak_next_lo = (tweak_next_lo << 1) ^ carry; + + burn_depth = crypt_fn (ctx, outbuf, outbuf); + + buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo); + buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi); + + outbuf += GCRY_XTS_BLOCK_LEN; + inbuf += GCRY_XTS_BLOCK_LEN; + nblocks--; + } + + buf_put_le64 (tweak + 0, tweak_next_lo); + buf_put_le64 (tweak + 8, tweak_next_hi); + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 5 * sizeof(void *)); +} + /* Run the self-tests for AES 128. Returns NULL on success. */ static const char* selftest_basic_128 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; /* The test vectors are from the AES supplied ones; more or less randomly taken from ecb_tbl.txt (I=42,81,14) */ #if 1 static const unsigned char plaintext_128[16] = { 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A }; static const unsigned char key_128[16] = { 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA }; static const unsigned char ciphertext_128[16] = { 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD }; #else /* Test vectors from fips-197, appendix C. */ # warning debug test vectors in use static const unsigned char plaintext_128[16] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff }; static const unsigned char key_128[16] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ }; static const unsigned char ciphertext_128[16] = { 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a }; #endif /* Because gcc/ld can only align the CTX struct on 8 bytes on the stack, we need to allocate that context on the heap. */ ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_128, sizeof (key_128)); rijndael_encrypt (ctx, scratch, plaintext_128); if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) { xfree (ctxmem); return "AES-128 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) return "AES-128 test decryption failed."; return NULL; } /* Run the self-tests for AES 192. Returns NULL on success. */ static const char* selftest_basic_192 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; static unsigned char plaintext_192[16] = { 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 }; static unsigned char key_192[24] = { 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 }; static const unsigned char ciphertext_192[16] = { 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA }; ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_192, sizeof(key_192)); rijndael_encrypt (ctx, scratch, plaintext_192); if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) { xfree (ctxmem); return "AES-192 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) return "AES-192 test decryption failed."; return NULL; } /* Run the self-tests for AES 256. Returns NULL on success. */ static const char* selftest_basic_256 (void) { RIJNDAEL_context *ctx; unsigned char *ctxmem; unsigned char scratch[16]; static unsigned char plaintext_256[16] = { 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 }; static unsigned char key_256[32] = { 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E }; static const unsigned char ciphertext_256[16] = { 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 }; ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem); if (!ctx) return "failed to allocate memory"; rijndael_setkey (ctx, key_256, sizeof(key_256)); rijndael_encrypt (ctx, scratch, plaintext_256); if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) { xfree (ctxmem); return "AES-256 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); xfree (ctxmem); if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) return "AES-256 test decryption failed."; return NULL; } /* Run the self-tests for AES-CTR-128, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char* selftest_ctr_128 (void) { const int nblocks = 8+1; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_ctr("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_ctr_enc, nblocks, blocksize, context_size); } /* Run the self-tests for AES-CBC-128, tests bulk CBC decryption. Returns NULL on success. */ static const char* selftest_cbc_128 (void) { const int nblocks = 8+2; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_cbc("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_cbc_dec, nblocks, blocksize, context_size); } /* Run the self-tests for AES-CFB-128, tests bulk CFB decryption. Returns NULL on success. */ static const char* selftest_cfb_128 (void) { const int nblocks = 8+2; const int blocksize = BLOCKSIZE; const int context_size = sizeof(RIJNDAEL_context); return _gcry_selftest_helper_cfb("AES", &rijndael_setkey, &rijndael_encrypt, &_gcry_aes_cfb_dec, nblocks, blocksize, context_size); } /* Run all the self-tests and return NULL on success. This function is used for the on-the-fly self-tests. */ static const char * selftest (void) { const char *r; if ( (r = selftest_basic_128 ()) || (r = selftest_basic_192 ()) || (r = selftest_basic_256 ()) ) return r; if ( (r = selftest_ctr_128 ()) ) return r; if ( (r = selftest_cbc_128 ()) ) return r; if ( (r = selftest_cfb_128 ()) ) return r; return r; } /* SP800-38a.pdf for AES-128. */ static const char * selftest_fips_128_38a (int requested_mode) { static const struct tv { int mode; const unsigned char key[16]; const unsigned char iv[16]; struct { const unsigned char input[16]; const unsigned char output[16]; } data[4]; } tv[2] = { { GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } } }, { GCRY_CIPHER_MODE_OFB, { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, } } }; unsigned char scratch[16]; gpg_error_t err; int tvi, idx; gcry_cipher_hd_t hdenc = NULL; gcry_cipher_hd_t hddec = NULL; #define Fail(a) do { \ _gcry_cipher_close (hdenc); \ _gcry_cipher_close (hddec); \ return a; \ } while (0) gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); for (tvi=0; tvi < DIM (tv); tvi++) if (tv[tvi].mode == requested_mode) break; if (tvi == DIM (tv)) Fail ("no test data for this mode"); err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); if (!err) err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); if (err) Fail ("set key"); err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); if (!err) err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); if (err) Fail ("set IV"); for (idx=0; idx < DIM (tv[tvi].data); idx++) { err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, tv[tvi].data[idx].input, sizeof tv[tvi].data[idx].input); if (err) Fail ("encrypt command"); if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) Fail ("encrypt mismatch"); err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, tv[tvi].data[idx].output, sizeof tv[tvi].data[idx].output); if (err) Fail ("decrypt command"); if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) Fail ("decrypt mismatch"); } #undef Fail _gcry_cipher_close (hdenc); _gcry_cipher_close (hddec); return NULL; } /* Complete selftest for AES-128 with all modes and driver code. */ static gpg_err_code_t selftest_fips_128 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "low-level"; errtxt = selftest_basic_128 (); if (errtxt) goto failed; if (extended) { what = "cfb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); if (errtxt) goto failed; what = "ofb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES128, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-192. */ static gpg_err_code_t selftest_fips_192 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_192 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES192, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-256. */ static gpg_err_code_t selftest_fips_256 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_256 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES256, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_CIPHER_AES128: ec = selftest_fips_128 (extended, report); break; case GCRY_CIPHER_AES192: ec = selftest_fips_192 (extended, report); break; case GCRY_CIPHER_AES256: ec = selftest_fips_256 (extended, report); break; default: ec = GPG_ERR_CIPHER_ALGO; break; } return ec; } static const char *rijndael_names[] = { "RIJNDAEL", "AES128", "AES-128", NULL }; static gcry_cipher_oid_spec_t rijndael_oids[] = { { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes = { GCRY_CIPHER_AES, {0, 1}, "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael192_names[] = { "RIJNDAEL192", "AES-192", NULL }; static gcry_cipher_oid_spec_t rijndael192_oids[] = { { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes192 = { GCRY_CIPHER_AES192, {0, 1}, "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael256_names[] = { "RIJNDAEL256", "AES-256", NULL }; static gcry_cipher_oid_spec_t rijndael256_oids[] = { { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes256 = { GCRY_CIPHER_AES256, {0, 1}, "AES256", rijndael256_names, rijndael256_oids, 16, 256, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; diff --git a/src/cipher.h b/src/cipher.h index a6f257df..7c2e5d9e 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -1,345 +1,348 @@ /* cipher.h * Copyright (C) 1998, 2002, 2003, 2009 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #ifndef G10_CIPHER_H #define G10_CIPHER_H #include "gcrypt-int.h" #define DBG_CIPHER _gcry_get_debug_flag( 1 ) #include "../random/random.h" #define PUBKEY_FLAG_NO_BLINDING (1 << 0) #define PUBKEY_FLAG_RFC6979 (1 << 1) #define PUBKEY_FLAG_FIXEDLEN (1 << 2) #define PUBKEY_FLAG_LEGACYRESULT (1 << 3) #define PUBKEY_FLAG_RAW_FLAG (1 << 4) #define PUBKEY_FLAG_TRANSIENT_KEY (1 << 5) #define PUBKEY_FLAG_USE_X931 (1 << 6) #define PUBKEY_FLAG_USE_FIPS186 (1 << 7) #define PUBKEY_FLAG_USE_FIPS186_2 (1 << 8) #define PUBKEY_FLAG_PARAM (1 << 9) #define PUBKEY_FLAG_COMP (1 << 10) #define PUBKEY_FLAG_NOCOMP (1 << 11) #define PUBKEY_FLAG_EDDSA (1 << 12) #define PUBKEY_FLAG_GOST (1 << 13) #define PUBKEY_FLAG_NO_KEYTEST (1 << 14) #define PUBKEY_FLAG_DJB_TWEAK (1 << 15) enum pk_operation { PUBKEY_OP_ENCRYPT, PUBKEY_OP_DECRYPT, PUBKEY_OP_SIGN, PUBKEY_OP_VERIFY }; enum pk_encoding { PUBKEY_ENC_RAW, PUBKEY_ENC_PKCS1, PUBKEY_ENC_PKCS1_RAW, PUBKEY_ENC_OAEP, PUBKEY_ENC_PSS, PUBKEY_ENC_UNKNOWN }; struct pk_encoding_ctx { enum pk_operation op; unsigned int nbits; enum pk_encoding encoding; int flags; int hash_algo; /* for OAEP */ unsigned char *label; size_t labellen; /* for PSS */ size_t saltlen; int (* verify_cmp) (void *opaque, gcry_mpi_t tmp); void *verify_arg; }; #define CIPHER_INFO_NO_WEAK_KEY 1 #include "cipher-proto.h" /* The internal encryption modes. */ enum gcry_cipher_internal_modes { GCRY_CIPHER_MODE_INTERNAL = 0x10000, GCRY_CIPHER_MODE_CMAC = 0x10000 + 1 /* Cipher-based MAC. */ }; /*-- cipher.c --*/ gcry_err_code_t _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, int algo, int mode, unsigned int flags); /*-- cipher-cmac.c --*/ gcry_err_code_t _gcry_cipher_cmac_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); gcry_err_code_t _gcry_cipher_cmac_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_cmac_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); gcry_err_code_t _gcry_cipher_cmac_set_subkeys /* */ (gcry_cipher_hd_t c); /*-- rmd160.c --*/ void _gcry_rmd160_hash_buffer (void *outbuf, const void *buffer, size_t length); /*-- sha1.c --*/ void _gcry_sha1_hash_buffer (void *outbuf, const void *buffer, size_t length); void _gcry_sha1_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt); /*-- sha256.c --*/ void _gcry_sha256_hash_buffer (void *outbuf, const void *buffer, size_t length); void _gcry_sha256_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt); /*-- sha512.c --*/ void _gcry_sha512_hash_buffer (void *outbuf, const void *buffer, size_t length); void _gcry_sha512_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt); /*-- sm3.c --*/ void _gcry_sm3_hash_buffer (void *outbuf, const void *buffer, size_t length); void _gcry_sm3_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt); /*-- blake2.c --*/ gcry_err_code_t _gcry_blake2_init_with_key(void *ctx, unsigned int flags, const unsigned char *key, size_t keylen, int algo); /*-- rijndael.c --*/ void _gcry_aes_cfb_enc (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks); void _gcry_aes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_aes_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); +void _gcry_aes_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt); /*-- blowfish.c --*/ void _gcry_blowfish_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_blowfish_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_blowfish_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); /*-- cast5.c --*/ void _gcry_cast5_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_cast5_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_cast5_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); /*-- camellia-glue.c --*/ void _gcry_camellia_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_camellia_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_camellia_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); /*-- des.c --*/ void _gcry_3des_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_3des_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_3des_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); /*-- serpent.c --*/ void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_serpent_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_serpent_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); /*-- twofish.c --*/ void _gcry_twofish_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_twofish_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void _gcry_twofish_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); /*-- dsa.c --*/ void _gcry_register_pk_dsa_progress (gcry_handler_progress_t cbc, void *cb_data); /*-- elgamal.c --*/ void _gcry_register_pk_elg_progress (gcry_handler_progress_t cb, void *cb_data); /*-- ecc.c --*/ void _gcry_register_pk_ecc_progress (gcry_handler_progress_t cbc, void *cb_data); /*-- primegen.c --*/ void _gcry_register_primegen_progress (gcry_handler_progress_t cb, void *cb_data); /*-- pubkey.c --*/ /* Declarations for the cipher specifications. */ extern gcry_cipher_spec_t _gcry_cipher_spec_blowfish; extern gcry_cipher_spec_t _gcry_cipher_spec_des; extern gcry_cipher_spec_t _gcry_cipher_spec_tripledes; extern gcry_cipher_spec_t _gcry_cipher_spec_arcfour; extern gcry_cipher_spec_t _gcry_cipher_spec_cast5; extern gcry_cipher_spec_t _gcry_cipher_spec_aes; extern gcry_cipher_spec_t _gcry_cipher_spec_aes192; extern gcry_cipher_spec_t _gcry_cipher_spec_aes256; extern gcry_cipher_spec_t _gcry_cipher_spec_twofish; extern gcry_cipher_spec_t _gcry_cipher_spec_twofish128; extern gcry_cipher_spec_t _gcry_cipher_spec_serpent128; extern gcry_cipher_spec_t _gcry_cipher_spec_serpent192; extern gcry_cipher_spec_t _gcry_cipher_spec_serpent256; extern gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40; extern gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_128; extern gcry_cipher_spec_t _gcry_cipher_spec_seed; extern gcry_cipher_spec_t _gcry_cipher_spec_camellia128; extern gcry_cipher_spec_t _gcry_cipher_spec_camellia192; extern gcry_cipher_spec_t _gcry_cipher_spec_camellia256; extern gcry_cipher_spec_t _gcry_cipher_spec_idea; extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20; extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12; extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147; extern gcry_cipher_spec_t _gcry_cipher_spec_chacha20; /* Declarations for the digest specifications. */ extern gcry_md_spec_t _gcry_digest_spec_crc32; extern gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510; extern gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440; extern gcry_md_spec_t _gcry_digest_spec_gost3411_94; extern gcry_md_spec_t _gcry_digest_spec_gost3411_cp; extern gcry_md_spec_t _gcry_digest_spec_stribog_256; extern gcry_md_spec_t _gcry_digest_spec_stribog_512; extern gcry_md_spec_t _gcry_digest_spec_md2; extern gcry_md_spec_t _gcry_digest_spec_md4; extern gcry_md_spec_t _gcry_digest_spec_md5; extern gcry_md_spec_t _gcry_digest_spec_rmd160; extern gcry_md_spec_t _gcry_digest_spec_sha1; extern gcry_md_spec_t _gcry_digest_spec_sha224; extern gcry_md_spec_t _gcry_digest_spec_sha256; extern gcry_md_spec_t _gcry_digest_spec_sha384; extern gcry_md_spec_t _gcry_digest_spec_sha512; extern gcry_md_spec_t _gcry_digest_spec_sha3_224; extern gcry_md_spec_t _gcry_digest_spec_sha3_256; extern gcry_md_spec_t _gcry_digest_spec_sha3_512; extern gcry_md_spec_t _gcry_digest_spec_sha3_384; extern gcry_md_spec_t _gcry_digest_spec_shake128; extern gcry_md_spec_t _gcry_digest_spec_shake256; extern gcry_md_spec_t _gcry_digest_spec_tiger; extern gcry_md_spec_t _gcry_digest_spec_tiger1; extern gcry_md_spec_t _gcry_digest_spec_tiger2; extern gcry_md_spec_t _gcry_digest_spec_whirlpool; extern gcry_md_spec_t _gcry_digest_spec_blake2b_512; extern gcry_md_spec_t _gcry_digest_spec_blake2b_384; extern gcry_md_spec_t _gcry_digest_spec_blake2b_256; extern gcry_md_spec_t _gcry_digest_spec_blake2b_160; extern gcry_md_spec_t _gcry_digest_spec_blake2s_256; extern gcry_md_spec_t _gcry_digest_spec_blake2s_224; extern gcry_md_spec_t _gcry_digest_spec_blake2s_160; extern gcry_md_spec_t _gcry_digest_spec_blake2s_128; extern gcry_md_spec_t _gcry_digest_spec_sm3; /* Declarations for the pubkey cipher specifications. */ extern gcry_pk_spec_t _gcry_pubkey_spec_rsa; extern gcry_pk_spec_t _gcry_pubkey_spec_elg; extern gcry_pk_spec_t _gcry_pubkey_spec_elg_e; extern gcry_pk_spec_t _gcry_pubkey_spec_dsa; extern gcry_pk_spec_t _gcry_pubkey_spec_ecc; #endif /*G10_CIPHER_H*/