diff --git a/cipher/idea.c b/cipher/idea.c index 0a810818..7f706660 100644 --- a/cipher/idea.c +++ b/cipher/idea.c @@ -1,382 +1,384 @@ /* idea.c - IDEA function * Copyright 1997, 1998, 1999, 2001 Werner Koch (dd9jn) * Copyright 2013 g10 Code GmbH * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * WERNER KOCH BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Except as contained in this notice, the name of Werner Koch shall not be * used in advertising or otherwise to promote the sale, use or other dealings * in this Software without prior written authorization from Werner Koch. * * Patents on IDEA have expired: * Europe: EP0482154 on 2011-05-16, * Japan: JP3225440 on 2011-05-16, * U.S.: 5,214,703 on 2012-01-07. */ /* * Please see http://www.noepatents.org/ to learn why software patents * are bad for society and what you can do to fight them. * * The code herein is based on the one from: * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. * ISBN 0-471-11709-9. */ #include #include #include #include #include #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "cipher-internal.h" #define IDEA_KEYSIZE 16 #define IDEA_BLOCKSIZE 8 #define IDEA_ROUNDS 8 #define IDEA_KEYLEN (6*IDEA_ROUNDS+4) typedef struct { u16 ek[IDEA_KEYLEN]; u16 dk[IDEA_KEYLEN]; int have_dk; } IDEA_context; static const char *selftest(void); static u16 mul_inv( u16 x ) { u16 t0, t1; u16 q, y; if( x < 2 ) return x; t1 = 0x10001UL / x; y = 0x10001UL % x; if( y == 1 ) return (1-t1) & 0xffff; t0 = 1; do { q = x / y; x = x % y; t0 += q * t1; if( x == 1 ) return t0; q = y / x; y = y % x; t1 += q * t0; } while( y != 1 ); return (1-t1) & 0xffff; } static void expand_key( const byte *userkey, u16 *ek ) { int i,j; for(j=0; j < 8; j++ ) { ek[j] = (*userkey << 8) + userkey[1]; userkey += 2; } for(i=0; j < IDEA_KEYLEN; j++ ) { i++; ek[i+7] = ek[i&7] << 9 | ek[(i+1)&7] >> 7; ek += i & 8; i &= 7; } } static void invert_key( u16 *ek, u16 dk[IDEA_KEYLEN] ) { int i; u16 t1, t2, t3; u16 temp[IDEA_KEYLEN]; u16 *p = temp + IDEA_KEYLEN; t1 = mul_inv( *ek++ ); t2 = -*ek++; t3 = -*ek++; *--p = mul_inv( *ek++ ); *--p = t3; *--p = t2; *--p = t1; for(i=0; i < IDEA_ROUNDS-1; i++ ) { t1 = *ek++; *--p = *ek++; *--p = t1; t1 = mul_inv( *ek++ ); t2 = -*ek++; t3 = -*ek++; *--p = mul_inv( *ek++ ); *--p = t2; *--p = t3; *--p = t1; } t1 = *ek++; *--p = *ek++; *--p = t1; t1 = mul_inv( *ek++ ); t2 = -*ek++; t3 = -*ek++; *--p = mul_inv( *ek++ ); *--p = t3; *--p = t2; *--p = t1; memcpy(dk, temp, sizeof(temp) ); wipememory(temp, sizeof(temp)); } static void cipher( byte *outbuf, const byte *inbuf, u16 *key ) { u16 s2, s3; u16 in[4]; int r = IDEA_ROUNDS; #define x1 (in[0]) #define x2 (in[1]) #define x3 (in[2]) #define x4 (in[3]) #define MUL(x,y) \ do {u16 _t16; u32 _t32; \ if( (_t16 = (y)) ) { \ if( (x = (x)&0xffff) ) { \ _t32 = (u32)x * _t16; \ x = _t32 & 0xffff; \ _t16 = _t32 >> 16; \ x = ((x)-_t16) + (x<_t16?1:0); \ } \ else { \ x = 1 - _t16; \ } \ } \ else { \ x = 1 - x; \ } \ } while(0) memcpy (in, inbuf, sizeof in); #ifndef WORDS_BIGENDIAN x1 = (x1>>8) | (x1<<8); x2 = (x2>>8) | (x2<<8); x3 = (x3>>8) | (x3<<8); x4 = (x4>>8) | (x4<<8); #endif do { MUL(x1, *key++); x2 += *key++; x3 += *key++; MUL(x4, *key++ ); s3 = x3; x3 ^= x1; MUL(x3, *key++); s2 = x2; x2 ^=x4; x2 += x3; MUL(x2, *key++); x3 += x2; x1 ^= x2; x4 ^= x3; x2 ^= s3; x3 ^= s2; } while( --r ); MUL(x1, *key++); x3 += *key++; x2 += *key++; MUL(x4, *key); #ifndef WORDS_BIGENDIAN x1 = (x1>>8) | (x1<<8); x2 = (x2>>8) | (x2<<8); x3 = (x3>>8) | (x3<<8); x4 = (x4>>8) | (x4<<8); #endif memcpy (outbuf+0, &x1, 2); memcpy (outbuf+2, &x3, 2); memcpy (outbuf+4, &x2, 2); memcpy (outbuf+6, &x4, 2); #undef MUL #undef x1 #undef x2 #undef x3 #undef x4 } static int do_setkey( IDEA_context *c, const byte *key, unsigned int keylen ) { static int initialized = 0; static const char *selftest_failed = 0; if( !initialized ) { initialized = 1; selftest_failed = selftest(); if( selftest_failed ) log_error( "%s\n", selftest_failed ); } if( selftest_failed ) return GPG_ERR_SELFTEST_FAILED; - assert(keylen == 16); + if (keylen != 16) + return GPG_ERR_INV_KEYLEN; + c->have_dk = 0; expand_key( key, c->ek ); invert_key( c->ek, c->dk ); return 0; } static gcry_err_code_t idea_setkey (void *context, const byte *key, unsigned int keylen, cipher_bulk_ops_t *bulk_ops) { IDEA_context *ctx = context; int rc = do_setkey (ctx, key, keylen); (void)bulk_ops; _gcry_burn_stack (23+6*sizeof(void*)); return rc; } static void encrypt_block( IDEA_context *c, byte *outbuf, const byte *inbuf ) { cipher( outbuf, inbuf, c->ek ); } static unsigned int idea_encrypt (void *context, byte *out, const byte *in) { IDEA_context *ctx = context; encrypt_block (ctx, out, in); return /*burn_stack*/ (24+3*sizeof (void*)); } static void decrypt_block( IDEA_context *c, byte *outbuf, const byte *inbuf ) { if( !c->have_dk ) { c->have_dk = 1; invert_key( c->ek, c->dk ); } cipher( outbuf, inbuf, c->dk ); } static unsigned int idea_decrypt (void *context, byte *out, const byte *in) { IDEA_context *ctx = context; decrypt_block (ctx, out, in); return /*burn_stack*/ (24+3*sizeof (void*)); } static const char * selftest( void ) { static struct { byte key[16]; byte plain[8]; byte cipher[8]; } test_vectors[] = { { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, { 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03 }, { 0x11, 0xFB, 0xED, 0x2B, 0x01, 0x98, 0x6D, 0xE5 } }, { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, { 0x54, 0x0E, 0x5F, 0xEA, 0x18, 0xC2, 0xF8, 0xB1 } }, { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, { 0x00, 0x19, 0x32, 0x4B, 0x64, 0x7D, 0x96, 0xAF }, { 0x9F, 0x0A, 0x0A, 0xB6, 0xE1, 0x0C, 0xED, 0x78 } }, { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, { 0xF5, 0x20, 0x2D, 0x5B, 0x9C, 0x67, 0x1B, 0x08 }, { 0xCF, 0x18, 0xFD, 0x73, 0x55, 0xE2, 0xC5, 0xC5 } }, { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, { 0xFA, 0xE6, 0xD2, 0xBE, 0xAA, 0x96, 0x82, 0x6E }, { 0x85, 0xDF, 0x52, 0x00, 0x56, 0x08, 0x19, 0x3D } }, { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, { 0x0A, 0x14, 0x1E, 0x28, 0x32, 0x3C, 0x46, 0x50 }, { 0x2F, 0x7D, 0xE7, 0x50, 0x21, 0x2F, 0xB7, 0x34 } }, { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, { 0x05, 0x0A, 0x0F, 0x14, 0x19, 0x1E, 0x23, 0x28 }, { 0x7B, 0x73, 0x14, 0x92, 0x5D, 0xE5, 0x9C, 0x09 } }, { { 0x00, 0x05, 0x00, 0x0A, 0x00, 0x0F, 0x00, 0x14, 0x00, 0x19, 0x00, 0x1E, 0x00, 0x23, 0x00, 0x28 }, { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, { 0x3E, 0xC0, 0x47, 0x80, 0xBE, 0xFF, 0x6E, 0x20 } }, { { 0x3A, 0x98, 0x4E, 0x20, 0x00, 0x19, 0x5D, 0xB3, 0x2E, 0xE5, 0x01, 0xC8, 0xC4, 0x7C, 0xEA, 0x60 }, { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, { 0x97, 0xBC, 0xD8, 0x20, 0x07, 0x80, 0xDA, 0x86 } }, { { 0x00, 0x64, 0x00, 0xC8, 0x01, 0x2C, 0x01, 0x90, 0x01, 0xF4, 0x02, 0x58, 0x02, 0xBC, 0x03, 0x20 }, { 0x05, 0x32, 0x0A, 0x64, 0x14, 0xC8, 0x19, 0xFA }, { 0x65, 0xBE, 0x87, 0xE7, 0xA2, 0x53, 0x8A, 0xED } }, { { 0x9D, 0x40, 0x75, 0xC1, 0x03, 0xBC, 0x32, 0x2A, 0xFB, 0x03, 0xE7, 0xBE, 0x6A, 0xB3, 0x00, 0x06 }, { 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 }, { 0xF5, 0xDB, 0x1A, 0xC4, 0x5E, 0x5E, 0xF9, 0xF9 } } }; IDEA_context c; byte buffer[8]; int i; for(i=0; i < DIM(test_vectors); i++ ) { do_setkey( &c, test_vectors[i].key, 16 ); encrypt_block( &c, buffer, test_vectors[i].plain ); if( memcmp( buffer, test_vectors[i].cipher, 8 ) ) return "IDEA test encryption failed."; decrypt_block( &c, buffer, test_vectors[i].cipher ); if( memcmp( buffer, test_vectors[i].plain, 8 ) ) return "IDEA test decryption failed."; } return NULL; } gcry_cipher_spec_t _gcry_cipher_spec_idea = { GCRY_CIPHER_IDEA, {0, 0}, "IDEA", NULL, NULL, IDEA_BLOCKSIZE, 128, sizeof (IDEA_context), idea_setkey, idea_encrypt, idea_decrypt }; diff --git a/cipher/rfc2268.c b/cipher/rfc2268.c index f018b640..b093f022 100644 --- a/cipher/rfc2268.c +++ b/cipher/rfc2268.c @@ -1,378 +1,381 @@ /* rfc2268.c - The cipher described in rfc2268; aka Ron's Cipher 2. * Copyright (C) 2003 Nikos Mavroyanopoulos * Copyright (C) 2004 Free Software Foundation, Inc. * * This file is part of Libgcrypt * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ /* This implementation was written by Nikos Mavroyanopoulos for GNUTLS * as a Libgcrypt module (gnutls/lib/x509/rc2.c) and later adapted for * direct use by Libgcrypt by Werner Koch. This implementation is * only useful for pkcs#12 decryption. * * The implementation here is based on Peter Gutmann's RRC.2 paper. */ #include #include #include #include #include "g10lib.h" #include "types.h" #include "cipher.h" #include "cipher-internal.h" #define RFC2268_BLOCKSIZE 8 typedef struct { u16 S[64]; } RFC2268_context; static const unsigned char rfc2268_sbox[] = { 217, 120, 249, 196, 25, 221, 181, 237, 40, 233, 253, 121, 74, 160, 216, 157, 198, 126, 55, 131, 43, 118, 83, 142, 98, 76, 100, 136, 68, 139, 251, 162, 23, 154, 89, 245, 135, 179, 79, 19, 97, 69, 109, 141, 9, 129, 125, 50, 189, 143, 64, 235, 134, 183, 123, 11, 240, 149, 33, 34, 92, 107, 78, 130, 84, 214, 101, 147, 206, 96, 178, 28, 115, 86, 192, 20, 167, 140, 241, 220, 18, 117, 202, 31, 59, 190, 228, 209, 66, 61, 212, 48, 163, 60, 182, 38, 111, 191, 14, 218, 70, 105, 7, 87, 39, 242, 29, 155, 188, 148, 67, 3, 248, 17, 199, 246, 144, 239, 62, 231, 6, 195, 213, 47, 200, 102, 30, 215, 8, 232, 234, 222, 128, 82, 238, 247, 132, 170, 114, 172, 53, 77, 106, 42, 150, 26, 210, 113, 90, 21, 73, 116, 75, 159, 208, 94, 4, 24, 164, 236, 194, 224, 65, 110, 15, 81, 203, 204, 36, 145, 175, 80, 161, 244, 112, 57, 153, 124, 58, 133, 35, 184, 180, 122, 252, 2, 54, 91, 37, 85, 151, 49, 45, 93, 250, 152, 227, 138, 146, 174, 5, 223, 41, 16, 103, 108, 186, 201, 211, 0, 230, 207, 225, 158, 168, 44, 99, 22, 1, 63, 88, 226, 137, 169, 13, 56, 52, 27, 171, 51, 255, 176, 187, 72, 12, 95, 185, 177, 205, 46, 197, 243, 219, 71, 229, 165, 156, 119, 10, 166, 32, 104, 254, 127, 193, 173 }; #define rotl16(x,n) (((x) << ((u16)(n))) | ((x) >> (16 - (u16)(n)))) #define rotr16(x,n) (((x) >> ((u16)(n))) | ((x) << (16 - (u16)(n)))) static const char *selftest (void); static void do_encrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf) { RFC2268_context *ctx = context; register int i, j; u16 word0 = 0, word1 = 0, word2 = 0, word3 = 0; word0 = (word0 << 8) | inbuf[1]; word0 = (word0 << 8) | inbuf[0]; word1 = (word1 << 8) | inbuf[3]; word1 = (word1 << 8) | inbuf[2]; word2 = (word2 << 8) | inbuf[5]; word2 = (word2 << 8) | inbuf[4]; word3 = (word3 << 8) | inbuf[7]; word3 = (word3 << 8) | inbuf[6]; for (i = 0; i < 16; i++) { j = i * 4; /* For some reason I cannot combine those steps. */ word0 += (word1 & ~word3) + (word2 & word3) + ctx->S[j]; word0 = rotl16(word0, 1); word1 += (word2 & ~word0) + (word3 & word0) + ctx->S[j + 1]; word1 = rotl16(word1, 2); word2 += (word3 & ~word1) + (word0 & word1) + ctx->S[j + 2]; word2 = rotl16(word2, 3); word3 += (word0 & ~word2) + (word1 & word2) + ctx->S[j + 3]; word3 = rotl16(word3, 5); if (i == 4 || i == 10) { word0 += ctx->S[word3 & 63]; word1 += ctx->S[word0 & 63]; word2 += ctx->S[word1 & 63]; word3 += ctx->S[word2 & 63]; } } outbuf[0] = word0 & 255; outbuf[1] = word0 >> 8; outbuf[2] = word1 & 255; outbuf[3] = word1 >> 8; outbuf[4] = word2 & 255; outbuf[5] = word2 >> 8; outbuf[6] = word3 & 255; outbuf[7] = word3 >> 8; } static unsigned int encrypt_block (void *context, unsigned char *outbuf, const unsigned char *inbuf) { do_encrypt (context, outbuf, inbuf); return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 4); } static void do_decrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf) { RFC2268_context *ctx = context; register int i, j; u16 word0 = 0, word1 = 0, word2 = 0, word3 = 0; word0 = (word0 << 8) | inbuf[1]; word0 = (word0 << 8) | inbuf[0]; word1 = (word1 << 8) | inbuf[3]; word1 = (word1 << 8) | inbuf[2]; word2 = (word2 << 8) | inbuf[5]; word2 = (word2 << 8) | inbuf[4]; word3 = (word3 << 8) | inbuf[7]; word3 = (word3 << 8) | inbuf[6]; for (i = 15; i >= 0; i--) { j = i * 4; word3 = rotr16(word3, 5); word3 -= (word0 & ~word2) + (word1 & word2) + ctx->S[j + 3]; word2 = rotr16(word2, 3); word2 -= (word3 & ~word1) + (word0 & word1) + ctx->S[j + 2]; word1 = rotr16(word1, 2); word1 -= (word2 & ~word0) + (word3 & word0) + ctx->S[j + 1]; word0 = rotr16(word0, 1); word0 -= (word1 & ~word3) + (word2 & word3) + ctx->S[j]; if (i == 5 || i == 11) { word3 = word3 - ctx->S[word2 & 63]; word2 = word2 - ctx->S[word1 & 63]; word1 = word1 - ctx->S[word0 & 63]; word0 = word0 - ctx->S[word3 & 63]; } } outbuf[0] = word0 & 255; outbuf[1] = word0 >> 8; outbuf[2] = word1 & 255; outbuf[3] = word1 >> 8; outbuf[4] = word2 & 255; outbuf[5] = word2 >> 8; outbuf[6] = word3 & 255; outbuf[7] = word3 >> 8; } static unsigned int decrypt_block (void *context, unsigned char *outbuf, const unsigned char *inbuf) { do_decrypt (context, outbuf, inbuf); return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 4); } static gpg_err_code_t setkey_core (void *context, const unsigned char *key, unsigned int keylen, int with_phase2) { static int initialized; static const char *selftest_failed; RFC2268_context *ctx = context; unsigned int i; unsigned char *S, x; int len; int bits = keylen * 8; if (!initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("RFC2268 selftest failed (%s).\n", selftest_failed); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; if (keylen < 40 / 8) /* We want at least 40 bits. */ return GPG_ERR_INV_KEYLEN; + if (keylen > 128) + return GPG_ERR_INV_KEYLEN; + S = (unsigned char *) ctx->S; for (i = 0; i < keylen; i++) S[i] = key[i]; for (i = keylen; i < 128; i++) S[i] = rfc2268_sbox[(S[i - keylen] + S[i - 1]) & 255]; S[0] = rfc2268_sbox[S[0]]; /* Phase 2 - reduce effective key size to "bits". This was not * discussed in Gutmann's paper. I've copied that from the public * domain code posted in sci.crypt. */ if (with_phase2) { len = (bits + 7) >> 3; i = 128 - len; x = rfc2268_sbox[S[i] & (255 >> (7 & -bits))]; S[i] = x; while (i--) { x = rfc2268_sbox[x ^ S[i + len]]; S[i] = x; } } /* Make the expanded key, endian independent. */ for (i = 0; i < 64; i++) ctx->S[i] = ( (u16) S[i * 2] | (((u16) S[i * 2 + 1]) << 8)); return 0; } static gpg_err_code_t do_setkey (void *context, const unsigned char *key, unsigned int keylen, cipher_bulk_ops_t *bulk_ops) { (void)bulk_ops; return setkey_core (context, key, keylen, 1); } static const char * selftest (void) { RFC2268_context ctx; unsigned char scratch[16]; /* Test vectors from Peter Gutmann's paper. */ static unsigned char key_1[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static unsigned char plaintext_1[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const unsigned char ciphertext_1[] = { 0x1C, 0x19, 0x8A, 0x83, 0x8D, 0xF0, 0x28, 0xB7 }; static unsigned char key_2[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F }; static unsigned char plaintext_2[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static unsigned char ciphertext_2[] = { 0x50, 0xDC, 0x01, 0x62, 0xBD, 0x75, 0x7F, 0x31 }; /* This one was checked against libmcrypt's RFC2268. */ static unsigned char key_3[] = { 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static unsigned char plaintext_3[] = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static unsigned char ciphertext_3[] = { 0x8f, 0xd1, 0x03, 0x89, 0x33, 0x6b, 0xf9, 0x5e }; /* First test. */ setkey_core (&ctx, key_1, sizeof(key_1), 0); do_encrypt (&ctx, scratch, plaintext_1); if (memcmp (scratch, ciphertext_1, sizeof(ciphertext_1))) return "RFC2268 encryption test 1 failed."; setkey_core (&ctx, key_1, sizeof(key_1), 0); do_decrypt (&ctx, scratch, scratch); if (memcmp (scratch, plaintext_1, sizeof(plaintext_1))) return "RFC2268 decryption test 1 failed."; /* Second test. */ setkey_core (&ctx, key_2, sizeof(key_2), 0); do_encrypt (&ctx, scratch, plaintext_2); if (memcmp (scratch, ciphertext_2, sizeof(ciphertext_2))) return "RFC2268 encryption test 2 failed."; setkey_core (&ctx, key_2, sizeof(key_2), 0); do_decrypt (&ctx, scratch, scratch); if (memcmp (scratch, plaintext_2, sizeof(plaintext_2))) return "RFC2268 decryption test 2 failed."; /* Third test. */ setkey_core(&ctx, key_3, sizeof(key_3), 0); do_encrypt(&ctx, scratch, plaintext_3); if (memcmp(scratch, ciphertext_3, sizeof(ciphertext_3))) return "RFC2268 encryption test 3 failed."; setkey_core (&ctx, key_3, sizeof(key_3), 0); do_decrypt (&ctx, scratch, scratch); if (memcmp(scratch, plaintext_3, sizeof(plaintext_3))) return "RFC2268 decryption test 3 failed."; return NULL; } static gcry_cipher_oid_spec_t oids_rfc2268_40[] = { /*{ "1.2.840.113549.3.2", GCRY_CIPHER_MODE_CBC },*/ /* pbeWithSHAAnd40BitRC2_CBC */ { "1.2.840.113549.1.12.1.6", GCRY_CIPHER_MODE_CBC }, { NULL } }; static gcry_cipher_oid_spec_t oids_rfc2268_128[] = { /* pbeWithSHAAnd128BitRC2_CBC */ { "1.2.840.113549.1.12.1.5", GCRY_CIPHER_MODE_CBC }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40 = { GCRY_CIPHER_RFC2268_40, {0, 0}, "RFC2268_40", NULL, oids_rfc2268_40, RFC2268_BLOCKSIZE, 40, sizeof(RFC2268_context), do_setkey, encrypt_block, decrypt_block }; gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_128 = { GCRY_CIPHER_RFC2268_128, {0, 0}, "RFC2268_128", NULL, oids_rfc2268_128, RFC2268_BLOCKSIZE, 128, sizeof(RFC2268_context), do_setkey, encrypt_block, decrypt_block }; diff --git a/cipher/serpent.c b/cipher/serpent.c index 3c5eed2c..d2f7f16e 100644 --- a/cipher/serpent.c +++ b/cipher/serpent.c @@ -1,1807 +1,1811 @@ /* serpent.c - Implementation of the Serpent encryption algorithm. * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bithelp.h" #include "bufhelp.h" #include "cipher-internal.h" #include "cipher-selftest.h" /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ #undef USE_SSE2 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSE2 1 #endif /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ #undef USE_AVX2 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # if defined(ENABLE_AVX2_SUPPORT) # define USE_AVX2 1 # endif #endif /* USE_NEON indicates whether to enable ARM NEON assembly code. */ #undef USE_NEON #ifdef ENABLE_NEON_SUPPORT # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_NEON) # define USE_NEON 1 # endif #endif /*ENABLE_NEON_SUPPORT*/ /* Number of rounds per Serpent encrypt/decrypt operation. */ #define ROUNDS 32 /* Magic number, used during generating of the subkeys. */ #define PHI 0x9E3779B9 /* Serpent works on 128 bit blocks. */ typedef u32 serpent_block_t[4]; /* Serpent key, provided by the user. If the original key is shorter than 256 bits, it is padded. */ typedef u32 serpent_key_t[8]; /* The key schedule consists of 33 128 bit subkeys. */ typedef u32 serpent_subkeys_t[ROUNDS + 1][4]; /* A Serpent context. */ typedef struct serpent_context { serpent_subkeys_t keys; /* Generated subkeys. */ #ifdef USE_AVX2 int use_avx2; #endif #ifdef USE_NEON int use_neon; #endif } serpent_context_t; /* Assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI #if defined(USE_SSE2) || defined(USE_AVX2) # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define ASM_FUNC_ABI __attribute__((sysv_abi)) # else # define ASM_FUNC_ABI # endif #endif #ifdef USE_SSE2 /* Assembler implementations of Serpent using SSE2. Process 8 block in parallel. */ extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[8]) ASM_FUNC_ABI; extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[8]) ASM_FUNC_ABI; extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx, const unsigned char *abuf, unsigned char *offset, unsigned char *checksum, const u64 Ls[8]) ASM_FUNC_ABI; #endif #ifdef USE_AVX2 /* Assembler implementations of Serpent using AVX2. Process 16 block in parallel. */ extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx, const unsigned char *abuf, unsigned char *offset, unsigned char *checksum, const u64 Ls[16]) ASM_FUNC_ABI; #endif #ifdef USE_NEON /* Assembler implementations of Serpent using ARM NEON. Process 8 block in parallel. */ extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *ctr); extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv); extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv); extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const void *Ls[8]); extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *offset, unsigned char *checksum, const void *Ls[8]); extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx, const unsigned char *abuf, unsigned char *offset, unsigned char *checksum, const void *Ls[8]); #endif /* Prototypes. */ static const char *serpent_test (void); static void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static void _gcry_serpent_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static void _gcry_serpent_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); static size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); /* * These are the S-Boxes of Serpent from following research paper. * * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference, * (New York, New York, USA), p. 317–329, National Institute of Standards and * Technology, 2000. * * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf * */ #define SBOX0(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r3 ^= r0; r4 = r1; \ r1 &= r3; r4 ^= r2; \ r1 ^= r0; r0 |= r3; \ r0 ^= r4; r4 ^= r3; \ r3 ^= r2; r2 |= r1; \ r2 ^= r4; r4 = ~r4; \ r4 |= r1; r1 ^= r3; \ r1 ^= r4; r3 |= r0; \ r1 ^= r3; r4 ^= r3; \ \ w = r1; x = r4; y = r2; z = r0; \ } #define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r2 = ~r2; r4 = r1; \ r1 |= r0; r4 = ~r4; \ r1 ^= r2; r2 |= r4; \ r1 ^= r3; r0 ^= r4; \ r2 ^= r0; r0 &= r3; \ r4 ^= r0; r0 |= r1; \ r0 ^= r2; r3 ^= r4; \ r2 ^= r1; r3 ^= r0; \ r3 ^= r1; \ r2 &= r3; \ r4 ^= r2; \ \ w = r0; x = r4; y = r1; z = r3; \ } #define SBOX1(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r0 = ~r0; r2 = ~r2; \ r4 = r0; r0 &= r1; \ r2 ^= r0; r0 |= r3; \ r3 ^= r2; r1 ^= r0; \ r0 ^= r4; r4 |= r1; \ r1 ^= r3; r2 |= r0; \ r2 &= r4; r0 ^= r1; \ r1 &= r2; \ r1 ^= r0; r0 &= r2; \ r0 ^= r4; \ \ w = r2; x = r0; y = r3; z = r1; \ } #define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r4 = r1; r1 ^= r3; \ r3 &= r1; r4 ^= r2; \ r3 ^= r0; r0 |= r1; \ r2 ^= r3; r0 ^= r4; \ r0 |= r2; r1 ^= r3; \ r0 ^= r1; r1 |= r3; \ r1 ^= r0; r4 = ~r4; \ r4 ^= r1; r1 |= r0; \ r1 ^= r0; \ r1 |= r4; \ r3 ^= r1; \ \ w = r4; x = r0; y = r3; z = r2; \ } #define SBOX2(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r4 = r0; r0 &= r2; \ r0 ^= r3; r2 ^= r1; \ r2 ^= r0; r3 |= r4; \ r3 ^= r1; r4 ^= r2; \ r1 = r3; r3 |= r4; \ r3 ^= r0; r0 &= r1; \ r4 ^= r0; r1 ^= r3; \ r1 ^= r4; r4 = ~r4; \ \ w = r2; x = r3; y = r1; z = r4; \ } #define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r2 ^= r3; r3 ^= r0; \ r4 = r3; r3 &= r2; \ r3 ^= r1; r1 |= r2; \ r1 ^= r4; r4 &= r3; \ r2 ^= r3; r4 &= r0; \ r4 ^= r2; r2 &= r1; \ r2 |= r0; r3 = ~r3; \ r2 ^= r3; r0 ^= r3; \ r0 &= r1; r3 ^= r4; \ r3 ^= r0; \ \ w = r1; x = r4; y = r2; z = r3; \ } #define SBOX3(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r4 = r0; r0 |= r3; \ r3 ^= r1; r1 &= r4; \ r4 ^= r2; r2 ^= r3; \ r3 &= r0; r4 |= r1; \ r3 ^= r4; r0 ^= r1; \ r4 &= r0; r1 ^= r3; \ r4 ^= r2; r1 |= r0; \ r1 ^= r2; r0 ^= r3; \ r2 = r1; r1 |= r3; \ r1 ^= r0; \ \ w = r1; x = r2; y = r3; z = r4; \ } #define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r4 = r2; r2 ^= r1; \ r0 ^= r2; r4 &= r2; \ r4 ^= r0; r0 &= r1; \ r1 ^= r3; r3 |= r4; \ r2 ^= r3; r0 ^= r3; \ r1 ^= r4; r3 &= r2; \ r3 ^= r1; r1 ^= r0; \ r1 |= r2; r0 ^= r3; \ r1 ^= r4; \ r0 ^= r1; \ \ w = r2; x = r1; y = r3; z = r0; \ } #define SBOX4(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r1 ^= r3; r3 = ~r3; \ r2 ^= r3; r3 ^= r0; \ r4 = r1; r1 &= r3; \ r1 ^= r2; r4 ^= r3; \ r0 ^= r4; r2 &= r4; \ r2 ^= r0; r0 &= r1; \ r3 ^= r0; r4 |= r1; \ r4 ^= r0; r0 |= r3; \ r0 ^= r2; r2 &= r3; \ r0 = ~r0; r4 ^= r2; \ \ w = r1; x = r4; y = r0; z = r3; \ } #define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r4 = r2; r2 &= r3; \ r2 ^= r1; r1 |= r3; \ r1 &= r0; r4 ^= r2; \ r4 ^= r1; r1 &= r2; \ r0 = ~r0; r3 ^= r4; \ r1 ^= r3; r3 &= r0; \ r3 ^= r2; r0 ^= r1; \ r2 &= r0; r3 ^= r0; \ r2 ^= r4; \ r2 |= r3; r3 ^= r0; \ r2 ^= r1; \ \ w = r0; x = r3; y = r2; z = r4; \ } #define SBOX5(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r0 ^= r1; r1 ^= r3; \ r3 = ~r3; r4 = r1; \ r1 &= r0; r2 ^= r3; \ r1 ^= r2; r2 |= r4; \ r4 ^= r3; r3 &= r1; \ r3 ^= r0; r4 ^= r1; \ r4 ^= r2; r2 ^= r0; \ r0 &= r3; r2 = ~r2; \ r0 ^= r4; r4 |= r3; \ r2 ^= r4; \ \ w = r1; x = r3; y = r0; z = r2; \ } #define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r1 = ~r1; r4 = r3; \ r2 ^= r1; r3 |= r0; \ r3 ^= r2; r2 |= r1; \ r2 &= r0; r4 ^= r3; \ r2 ^= r4; r4 |= r0; \ r4 ^= r1; r1 &= r2; \ r1 ^= r3; r4 ^= r2; \ r3 &= r4; r4 ^= r1; \ r3 ^= r4; r4 = ~r4; \ r3 ^= r0; \ \ w = r1; x = r4; y = r3; z = r2; \ } #define SBOX6(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r2 = ~r2; r4 = r3; \ r3 &= r0; r0 ^= r4; \ r3 ^= r2; r2 |= r4; \ r1 ^= r3; r2 ^= r0; \ r0 |= r1; r2 ^= r1; \ r4 ^= r0; r0 |= r3; \ r0 ^= r2; r4 ^= r3; \ r4 ^= r0; r3 = ~r3; \ r2 &= r4; \ r2 ^= r3; \ \ w = r0; x = r1; y = r4; z = r2; \ } #define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r0 ^= r2; r4 = r2; \ r2 &= r0; r4 ^= r3; \ r2 = ~r2; r3 ^= r1; \ r2 ^= r3; r4 |= r0; \ r0 ^= r2; r3 ^= r4; \ r4 ^= r1; r1 &= r3; \ r1 ^= r0; r0 ^= r3; \ r0 |= r2; r3 ^= r1; \ r4 ^= r0; \ \ w = r1; x = r2; y = r4; z = r3; \ } #define SBOX7(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r4 = r1; r1 |= r2; \ r1 ^= r3; r4 ^= r2; \ r2 ^= r1; r3 |= r4; \ r3 &= r0; r4 ^= r2; \ r3 ^= r1; r1 |= r4; \ r1 ^= r0; r0 |= r4; \ r0 ^= r2; r1 ^= r4; \ r2 ^= r1; r1 &= r0; \ r1 ^= r4; r2 = ~r2; \ r2 |= r0; \ r4 ^= r2; \ \ w = r4; x = r3; y = r1; z = r0; \ } #define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \ { \ u32 r4; \ \ r4 = r2; r2 ^= r0; \ r0 &= r3; r4 |= r3; \ r2 = ~r2; r3 ^= r1; \ r1 |= r0; r0 ^= r2; \ r2 &= r4; r3 &= r4; \ r1 ^= r2; r2 ^= r0; \ r0 |= r2; r4 ^= r1; \ r0 ^= r3; r3 ^= r4; \ r4 |= r0; r3 ^= r2; \ r4 ^= r2; \ \ w = r3; x = r0; y = r1; z = r4; \ } /* XOR BLOCK1 into BLOCK0. */ #define BLOCK_XOR(block0, block1) \ { \ block0[0] ^= block1[0]; \ block0[1] ^= block1[1]; \ block0[2] ^= block1[2]; \ block0[3] ^= block1[3]; \ } /* Copy BLOCK_SRC to BLOCK_DST. */ #define BLOCK_COPY(block_dst, block_src) \ { \ block_dst[0] = block_src[0]; \ block_dst[1] = block_src[1]; \ block_dst[2] = block_src[2]; \ block_dst[3] = block_src[3]; \ } /* Apply SBOX number WHICH to to the block found in ARRAY0, writing the output to the block found in ARRAY1. */ #define SBOX(which, array0, array1) \ SBOX##which (array0[0], array0[1], array0[2], array0[3], \ array1[0], array1[1], array1[2], array1[3]); /* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing the output to the block found in ARRAY1. */ #define SBOX_INVERSE(which, array0, array1) \ SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3], \ array1[0], array1[1], array1[2], array1[3]); /* Apply the linear transformation to BLOCK. */ #define LINEAR_TRANSFORMATION(block) \ { \ block[0] = rol (block[0], 13); \ block[2] = rol (block[2], 3); \ block[1] = block[1] ^ block[0] ^ block[2]; \ block[3] = block[3] ^ block[2] ^ (block[0] << 3); \ block[1] = rol (block[1], 1); \ block[3] = rol (block[3], 7); \ block[0] = block[0] ^ block[1] ^ block[3]; \ block[2] = block[2] ^ block[3] ^ (block[1] << 7); \ block[0] = rol (block[0], 5); \ block[2] = rol (block[2], 22); \ } /* Apply the inverse linear transformation to BLOCK. */ #define LINEAR_TRANSFORMATION_INVERSE(block) \ { \ block[2] = ror (block[2], 22); \ block[0] = ror (block[0] , 5); \ block[2] = block[2] ^ block[3] ^ (block[1] << 7); \ block[0] = block[0] ^ block[1] ^ block[3]; \ block[3] = ror (block[3], 7); \ block[1] = ror (block[1], 1); \ block[3] = block[3] ^ block[2] ^ (block[0] << 3); \ block[1] = block[1] ^ block[0] ^ block[2]; \ block[2] = ror (block[2], 3); \ block[0] = ror (block[0], 13); \ } /* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. This macro increments `round'. */ #define ROUND(which, subkeys, block, block_tmp) \ { \ BLOCK_XOR (block, subkeys[round]); \ round++; \ SBOX (which, block, block_tmp); \ LINEAR_TRANSFORMATION (block_tmp); \ BLOCK_COPY (block, block_tmp); \ } /* Apply the last Serpent round to BLOCK, using the SBOX number WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. The result will be stored in BLOCK_TMP. This macro increments `round'. */ #define ROUND_LAST(which, subkeys, block, block_tmp) \ { \ BLOCK_XOR (block, subkeys[round]); \ round++; \ SBOX (which, block, block_tmp); \ BLOCK_XOR (block_tmp, subkeys[round]); \ round++; \ } /* Apply an inverse Serpent round to BLOCK, using the SBOX number WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. This macro increments `round'. */ #define ROUND_INVERSE(which, subkey, block, block_tmp) \ { \ LINEAR_TRANSFORMATION_INVERSE (block); \ SBOX_INVERSE (which, block, block_tmp); \ BLOCK_XOR (block_tmp, subkey[round]); \ round--; \ BLOCK_COPY (block, block_tmp); \ } /* Apply the first Serpent round to BLOCK, using the SBOX number WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage. The result will be stored in BLOCK_TMP. This macro increments `round'. */ #define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \ { \ BLOCK_XOR (block, subkeys[round]); \ round--; \ SBOX_INVERSE (which, block, block_tmp); \ BLOCK_XOR (block_tmp, subkeys[round]); \ round--; \ } /* Convert the user provided key KEY of KEY_LENGTH bytes into the internally used format. */ static void serpent_key_prepare (const byte *key, unsigned int key_length, serpent_key_t key_prepared) { int i; /* Copy key. */ key_length /= 4; for (i = 0; i < key_length; i++) key_prepared[i] = buf_get_le32 (key + i * 4); if (i < 8) { /* Key must be padded according to the Serpent specification. */ key_prepared[i] = 0x00000001; for (i++; i < 8; i++) key_prepared[i] = 0; } } /* Derive the 33 subkeys from KEY and store them in SUBKEYS. */ static void serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys) { u32 w[8]; /* The `prekey'. */ u32 ws[4]; u32 wt[4]; /* Initialize with key values. */ w[0] = key[0]; w[1] = key[1]; w[2] = key[2]; w[3] = key[3]; w[4] = key[4]; w[5] = key[5]; w[6] = key[6]; w[7] = key[7]; /* Expand to intermediate key using the affine recurrence. */ #define EXPAND_KEY4(wo, r) \ wo[0] = w[(r+0)%8] = \ rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \ wo[1] = w[(r+1)%8] = \ rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \ wo[2] = w[(r+2)%8] = \ rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \ wo[3] = w[(r+3)%8] = \ rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11); #define EXPAND_KEY(r) \ EXPAND_KEY4(ws, (r)); \ EXPAND_KEY4(wt, (r + 4)); /* Calculate subkeys via S-Boxes, in bitslice mode. */ EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]); EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]); EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]); EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]); EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]); EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]); EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]); EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]); EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]); EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]); EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]); EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]); EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]); EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]); EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]); EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]); EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]); wipememory (ws, sizeof (ws)); wipememory (wt, sizeof (wt)); wipememory (w, sizeof (w)); } /* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */ -static void +static gcry_err_code_t serpent_setkey_internal (serpent_context_t *context, const byte *key, unsigned int key_length) { serpent_key_t key_prepared; + if (key_length > 32) + return GPG_ERR_INV_KEYLEN; + serpent_key_prepare (key, key_length, key_prepared); serpent_subkeys_generate (key_prepared, context->keys); #ifdef USE_AVX2 context->use_avx2 = 0; if ((_gcry_get_hw_features () & HWF_INTEL_AVX2)) { context->use_avx2 = 1; } #endif #ifdef USE_NEON context->use_neon = 0; if ((_gcry_get_hw_features () & HWF_ARM_NEON)) { context->use_neon = 1; } #endif wipememory (key_prepared, sizeof(key_prepared)); + return 0; } /* Initialize CTX with the key KEY of KEY_LENGTH bytes. */ static gcry_err_code_t serpent_setkey (void *ctx, const byte *key, unsigned int key_length, cipher_bulk_ops_t *bulk_ops) { serpent_context_t *context = ctx; static const char *serpent_test_ret; static int serpent_init_done; gcry_err_code_t ret = GPG_ERR_NO_ERROR; if (! serpent_init_done) { /* Execute a self-test the first time, Serpent is used. */ serpent_init_done = 1; serpent_test_ret = serpent_test (); if (serpent_test_ret) log_error ("Serpent test failure: %s\n", serpent_test_ret); } /* Setup bulk encryption routines. */ memset (bulk_ops, 0, sizeof(*bulk_ops)); bulk_ops->cbc_dec = _gcry_serpent_cbc_dec; bulk_ops->cfb_dec = _gcry_serpent_cfb_dec; bulk_ops->ctr_enc = _gcry_serpent_ctr_enc; bulk_ops->ocb_crypt = _gcry_serpent_ocb_crypt; bulk_ops->ocb_auth = _gcry_serpent_ocb_auth; if (serpent_test_ret) ret = GPG_ERR_SELFTEST_FAILED; else - serpent_setkey_internal (context, key, key_length); + ret = serpent_setkey_internal (context, key, key_length); return ret; } static void serpent_encrypt_internal (serpent_context_t *context, const byte *input, byte *output) { serpent_block_t b, b_next; int round = 0; b[0] = buf_get_le32 (input + 0); b[1] = buf_get_le32 (input + 4); b[2] = buf_get_le32 (input + 8); b[3] = buf_get_le32 (input + 12); ROUND (0, context->keys, b, b_next); ROUND (1, context->keys, b, b_next); ROUND (2, context->keys, b, b_next); ROUND (3, context->keys, b, b_next); ROUND (4, context->keys, b, b_next); ROUND (5, context->keys, b, b_next); ROUND (6, context->keys, b, b_next); ROUND (7, context->keys, b, b_next); ROUND (0, context->keys, b, b_next); ROUND (1, context->keys, b, b_next); ROUND (2, context->keys, b, b_next); ROUND (3, context->keys, b, b_next); ROUND (4, context->keys, b, b_next); ROUND (5, context->keys, b, b_next); ROUND (6, context->keys, b, b_next); ROUND (7, context->keys, b, b_next); ROUND (0, context->keys, b, b_next); ROUND (1, context->keys, b, b_next); ROUND (2, context->keys, b, b_next); ROUND (3, context->keys, b, b_next); ROUND (4, context->keys, b, b_next); ROUND (5, context->keys, b, b_next); ROUND (6, context->keys, b, b_next); ROUND (7, context->keys, b, b_next); ROUND (0, context->keys, b, b_next); ROUND (1, context->keys, b, b_next); ROUND (2, context->keys, b, b_next); ROUND (3, context->keys, b, b_next); ROUND (4, context->keys, b, b_next); ROUND (5, context->keys, b, b_next); ROUND (6, context->keys, b, b_next); ROUND_LAST (7, context->keys, b, b_next); buf_put_le32 (output + 0, b_next[0]); buf_put_le32 (output + 4, b_next[1]); buf_put_le32 (output + 8, b_next[2]); buf_put_le32 (output + 12, b_next[3]); } static void serpent_decrypt_internal (serpent_context_t *context, const byte *input, byte *output) { serpent_block_t b, b_next; int round = ROUNDS; b_next[0] = buf_get_le32 (input + 0); b_next[1] = buf_get_le32 (input + 4); b_next[2] = buf_get_le32 (input + 8); b_next[3] = buf_get_le32 (input + 12); ROUND_FIRST_INVERSE (7, context->keys, b_next, b); ROUND_INVERSE (6, context->keys, b, b_next); ROUND_INVERSE (5, context->keys, b, b_next); ROUND_INVERSE (4, context->keys, b, b_next); ROUND_INVERSE (3, context->keys, b, b_next); ROUND_INVERSE (2, context->keys, b, b_next); ROUND_INVERSE (1, context->keys, b, b_next); ROUND_INVERSE (0, context->keys, b, b_next); ROUND_INVERSE (7, context->keys, b, b_next); ROUND_INVERSE (6, context->keys, b, b_next); ROUND_INVERSE (5, context->keys, b, b_next); ROUND_INVERSE (4, context->keys, b, b_next); ROUND_INVERSE (3, context->keys, b, b_next); ROUND_INVERSE (2, context->keys, b, b_next); ROUND_INVERSE (1, context->keys, b, b_next); ROUND_INVERSE (0, context->keys, b, b_next); ROUND_INVERSE (7, context->keys, b, b_next); ROUND_INVERSE (6, context->keys, b, b_next); ROUND_INVERSE (5, context->keys, b, b_next); ROUND_INVERSE (4, context->keys, b, b_next); ROUND_INVERSE (3, context->keys, b, b_next); ROUND_INVERSE (2, context->keys, b, b_next); ROUND_INVERSE (1, context->keys, b, b_next); ROUND_INVERSE (0, context->keys, b, b_next); ROUND_INVERSE (7, context->keys, b, b_next); ROUND_INVERSE (6, context->keys, b, b_next); ROUND_INVERSE (5, context->keys, b, b_next); ROUND_INVERSE (4, context->keys, b, b_next); ROUND_INVERSE (3, context->keys, b, b_next); ROUND_INVERSE (2, context->keys, b, b_next); ROUND_INVERSE (1, context->keys, b, b_next); ROUND_INVERSE (0, context->keys, b, b_next); buf_put_le32 (output + 0, b_next[0]); buf_put_le32 (output + 4, b_next[1]); buf_put_le32 (output + 8, b_next[2]); buf_put_le32 (output + 12, b_next[3]); } static unsigned int serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in) { serpent_context_t *context = ctx; serpent_encrypt_internal (context, buffer_in, buffer_out); return /*burn_stack*/ (2 * sizeof (serpent_block_t)); } static unsigned int serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in) { serpent_context_t *context = ctx; serpent_decrypt_internal (context, buffer_in, buffer_out); return /*burn_stack*/ (2 * sizeof (serpent_block_t)); } /* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size sizeof(serpent_block_t). */ static void _gcry_serpent_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { serpent_context_t *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char tmpbuf[sizeof(serpent_block_t)]; int burn_stack_depth = 2 * sizeof (serpent_block_t); #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 16; outbuf += 16 * sizeof(serpent_block_t); inbuf += 16 * sizeof(serpent_block_t); did_use_avx2 = 1; } if (did_use_avx2) { /* serpent-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic/sse2 code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif #ifdef USE_SSE2 { int did_use_sse2 = 0; /* Process data in 8 block chunks. */ while (nblocks >= 8) { _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_sse2 = 1; } if (did_use_sse2) { /* serpent-sse2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif #ifdef USE_NEON if (ctx->use_neon) { int did_use_neon = 0; /* Process data in 8 block chunks. */ while (nblocks >= 8) { _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_neon = 1; } if (did_use_neon) { /* serpent-neon assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } #endif for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ serpent_encrypt_internal(ctx, ctr, tmpbuf); /* XOR the input with the encrypted counter and store in output. */ cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t)); outbuf += sizeof(serpent_block_t); inbuf += sizeof(serpent_block_t); /* Increment the counter. */ cipher_block_add(ctr, 1, sizeof(serpent_block_t)); } wipememory(tmpbuf, sizeof(tmpbuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CBC mode. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_serpent_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { serpent_context_t *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char savebuf[sizeof(serpent_block_t)]; int burn_stack_depth = 2 * sizeof (serpent_block_t); #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 16; outbuf += 16 * sizeof(serpent_block_t); inbuf += 16 * sizeof(serpent_block_t); did_use_avx2 = 1; } if (did_use_avx2) { /* serpent-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic/sse2 code to handle smaller chunks... */ } #endif #ifdef USE_SSE2 { int did_use_sse2 = 0; /* Process data in 8 block chunks. */ while (nblocks >= 8) { _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_sse2 = 1; } if (did_use_sse2) { /* serpent-sse2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_NEON if (ctx->use_neon) { int did_use_neon = 0; /* Process data in 8 block chunks. */ while (nblocks >= 8) { _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_neon = 1; } if (did_use_neon) { /* serpent-neon assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ serpent_decrypt_internal (ctx, inbuf, savebuf); cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, sizeof(serpent_block_t)); inbuf += sizeof(serpent_block_t); outbuf += sizeof(serpent_block_t); } wipememory(savebuf, sizeof(savebuf)); _gcry_burn_stack(burn_stack_depth); } /* Bulk decryption of complete blocks in CFB mode. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_serpent_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { serpent_context_t *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = 2 * sizeof (serpent_block_t); #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; /* Process data in 16 block chunks. */ while (nblocks >= 16) { _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 16; outbuf += 16 * sizeof(serpent_block_t); inbuf += 16 * sizeof(serpent_block_t); did_use_avx2 = 1; } if (did_use_avx2) { /* serpent-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic/sse2 code to handle smaller chunks... */ } #endif #ifdef USE_SSE2 { int did_use_sse2 = 0; /* Process data in 8 block chunks. */ while (nblocks >= 8) { _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_sse2 = 1; } if (did_use_sse2) { /* serpent-sse2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_NEON if (ctx->use_neon) { int did_use_neon = 0; /* Process data in 8 block chunks. */ while (nblocks >= 8) { _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_neon = 1; } if (did_use_neon) { /* serpent-neon assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif for ( ;nblocks; nblocks-- ) { serpent_encrypt_internal(ctx, iv, iv); cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t)); outbuf += sizeof(serpent_block_t); inbuf += sizeof(serpent_block_t); } _gcry_burn_stack(burn_stack_depth); } /* Bulk encryption/decryption of complete blocks in OCB mode. */ static size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) serpent_context_t *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; int burn_stack_depth = 2 * sizeof (serpent_block_t); u64 blkn = c->u_mode.ocb.data_nblocks; #else (void)c; (void)outbuf_arg; (void)inbuf_arg; (void)encrypt; #endif #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; u64 Ls[16]; unsigned int n = 16 - (blkn % 16); u64 *l; int i; if (nblocks >= 16) { for (i = 0; i < 16; i += 8) { /* Use u64 to store pointers for x32 support (assembly function * assumes 64-bit pointers). */ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; } Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; l = &Ls[(15 + n) % 16]; /* Process data in 16 block chunks. */ while (nblocks >= 16) { blkn += 16; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); if (encrypt) _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); else _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); nblocks -= 16; outbuf += 16 * sizeof(serpent_block_t); inbuf += 16 * sizeof(serpent_block_t); did_use_avx2 = 1; } } if (did_use_avx2) { /* serpent-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_SSE2 { int did_use_sse2 = 0; u64 Ls[8]; unsigned int n = 8 - (blkn % 8); u64 *l; if (nblocks >= 8) { /* Use u64 to store pointers for x32 support (assembly function * assumes 64-bit pointers). */ Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; l = &Ls[(7 + n) % 8]; /* Process data in 8 block chunks. */ while (nblocks >= 8) { blkn += 8; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); if (encrypt) _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); else _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_sse2 = 1; } } if (did_use_sse2) { /* serpent-sse2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_NEON if (ctx->use_neon) { int did_use_neon = 0; const void *Ls[8]; unsigned int n = 8 - (blkn % 8); const void **l; if (nblocks >= 8) { Ls[(0 + n) % 8] = c->u_mode.ocb.L[0]; Ls[(1 + n) % 8] = c->u_mode.ocb.L[1]; Ls[(2 + n) % 8] = c->u_mode.ocb.L[0]; Ls[(3 + n) % 8] = c->u_mode.ocb.L[2]; Ls[(4 + n) % 8] = c->u_mode.ocb.L[0]; Ls[(5 + n) % 8] = c->u_mode.ocb.L[1]; Ls[(6 + n) % 8] = c->u_mode.ocb.L[0]; l = &Ls[(7 + n) % 8]; /* Process data in 8 block chunks. */ while (nblocks >= 8) { blkn += 8; *l = ocb_get_l(c, blkn - blkn % 8); if (encrypt) _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); else _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); inbuf += 8 * sizeof(serpent_block_t); did_use_neon = 1; } } if (did_use_neon) { /* serpent-neon assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) c->u_mode.ocb.data_nblocks = blkn; if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); #endif return nblocks; } /* Bulk authentication of complete blocks in OCB mode. */ static size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) serpent_context_t *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; int burn_stack_depth = 2 * sizeof(serpent_block_t); u64 blkn = c->u_mode.ocb.aad_nblocks; #else (void)c; (void)abuf_arg; #endif #ifdef USE_AVX2 if (ctx->use_avx2) { int did_use_avx2 = 0; u64 Ls[16]; unsigned int n = 16 - (blkn % 16); u64 *l; int i; if (nblocks >= 16) { for (i = 0; i < 16; i += 8) { /* Use u64 to store pointers for x32 support (assembly function * assumes 64-bit pointers). */ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; } Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; l = &Ls[(15 + n) % 16]; /* Process data in 16 block chunks. */ while (nblocks >= 16) { blkn += 16; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, Ls); nblocks -= 16; abuf += 16 * sizeof(serpent_block_t); did_use_avx2 = 1; } } if (did_use_avx2) { /* serpent-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_SSE2 { int did_use_sse2 = 0; u64 Ls[8]; unsigned int n = 8 - (blkn % 8); u64 *l; if (nblocks >= 8) { /* Use u64 to store pointers for x32 support (assembly function * assumes 64-bit pointers). */ Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; l = &Ls[(7 + n) % 8]; /* Process data in 8 block chunks. */ while (nblocks >= 8) { blkn += 8; *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, Ls); nblocks -= 8; abuf += 8 * sizeof(serpent_block_t); did_use_sse2 = 1; } } if (did_use_sse2) { /* serpent-avx2 assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif #ifdef USE_NEON if (ctx->use_neon) { int did_use_neon = 0; const void *Ls[8]; unsigned int n = 8 - (blkn % 8); const void **l; if (nblocks >= 8) { Ls[(0 + n) % 8] = c->u_mode.ocb.L[0]; Ls[(1 + n) % 8] = c->u_mode.ocb.L[1]; Ls[(2 + n) % 8] = c->u_mode.ocb.L[0]; Ls[(3 + n) % 8] = c->u_mode.ocb.L[2]; Ls[(4 + n) % 8] = c->u_mode.ocb.L[0]; Ls[(5 + n) % 8] = c->u_mode.ocb.L[1]; Ls[(6 + n) % 8] = c->u_mode.ocb.L[0]; l = &Ls[(7 + n) % 8]; /* Process data in 8 block chunks. */ while (nblocks >= 8) { blkn += 8; *l = ocb_get_l(c, blkn - blkn % 8); _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, Ls); nblocks -= 8; abuf += 8 * sizeof(serpent_block_t); did_use_neon = 1; } } if (did_use_neon) { /* serpent-neon assembly code does not use stack */ if (nblocks == 0) burn_stack_depth = 0; } /* Use generic code to handle smaller chunks... */ } #endif #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) c->u_mode.ocb.aad_nblocks = blkn; if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); #endif return nblocks; } /* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char* selftest_ctr_128 (void) { const int nblocks = 16+8+1; const int blocksize = sizeof(serpent_block_t); const int context_size = sizeof(serpent_context_t); return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey, &serpent_encrypt, nblocks, blocksize, context_size); } /* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption. Returns NULL on success. */ static const char* selftest_cbc_128 (void) { const int nblocks = 16+8+2; const int blocksize = sizeof(serpent_block_t); const int context_size = sizeof(serpent_context_t); return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey, &serpent_encrypt, nblocks, blocksize, context_size); } /* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption. Returns NULL on success. */ static const char* selftest_cfb_128 (void) { const int nblocks = 16+8+2; const int blocksize = sizeof(serpent_block_t); const int context_size = sizeof(serpent_context_t); return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey, &serpent_encrypt, nblocks, blocksize, context_size); } /* Serpent test. */ static const char * serpent_test (void) { serpent_context_t context; unsigned char scratch[16]; unsigned int i; const char *r; static struct test { int key_length; unsigned char key[32]; unsigned char text_plain[16]; unsigned char text_cipher[16]; } test_data[] = { { 16, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", "\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E", "\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D" }, { 24, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00", "\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E", "\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9" }, { 32, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", "\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E", "\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B" }, { 32, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", "\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00", "\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C" }, { 0 }, }; for (i = 0; test_data[i].key_length; i++) { serpent_setkey_internal (&context, test_data[i].key, test_data[i].key_length); serpent_encrypt_internal (&context, test_data[i].text_plain, scratch); if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t))) switch (test_data[i].key_length) { case 16: return "Serpent-128 test encryption failed."; case 24: return "Serpent-192 test encryption failed."; case 32: return "Serpent-256 test encryption failed."; } serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch); if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t))) switch (test_data[i].key_length) { case 16: return "Serpent-128 test decryption failed."; case 24: return "Serpent-192 test decryption failed."; case 32: return "Serpent-256 test decryption failed."; } } if ( (r = selftest_ctr_128 ()) ) return r; if ( (r = selftest_cbc_128 ()) ) return r; if ( (r = selftest_cfb_128 ()) ) return r; return NULL; } static gcry_cipher_oid_spec_t serpent128_oids[] = { {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB }, {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC }, {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB }, {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB }, { NULL } }; static gcry_cipher_oid_spec_t serpent192_oids[] = { {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB }, {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC }, {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB }, {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB }, { NULL } }; static gcry_cipher_oid_spec_t serpent256_oids[] = { {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB }, {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC }, {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB }, {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB }, { NULL } }; static const char *serpent128_aliases[] = { "SERPENT", "SERPENT-128", NULL }; static const char *serpent192_aliases[] = { "SERPENT-192", NULL }; static const char *serpent256_aliases[] = { "SERPENT-256", NULL }; gcry_cipher_spec_t _gcry_cipher_spec_serpent128 = { GCRY_CIPHER_SERPENT128, {0, 0}, "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128, sizeof (serpent_context_t), serpent_setkey, serpent_encrypt, serpent_decrypt }; gcry_cipher_spec_t _gcry_cipher_spec_serpent192 = { GCRY_CIPHER_SERPENT192, {0, 0}, "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192, sizeof (serpent_context_t), serpent_setkey, serpent_encrypt, serpent_decrypt }; gcry_cipher_spec_t _gcry_cipher_spec_serpent256 = { GCRY_CIPHER_SERPENT256, {0, 0}, "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256, sizeof (serpent_context_t), serpent_setkey, serpent_encrypt, serpent_decrypt };