diff --git a/cipher/blake2.c b/cipher/blake2.c index bfd24b9f..f2bf49e5 100644 --- a/cipher/blake2.c +++ b/cipher/blake2.c @@ -1,973 +1,996 @@ /* blake2.c - BLAKE2b and BLAKE2s hash functions (RFC 7693) * Copyright (C) 2017 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ /* The code is based on public-domain/CC0 BLAKE2 reference implementation * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/ref * Copyright 2012, Samuel Neves */ #include #include #include "g10lib.h" #include "bithelp.h" #include "bufhelp.h" #include "cipher.h" #include "hash-common.h" /* USE_AVX indicates whether to compile with Intel AVX code. */ #undef USE_AVX #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX 1 #endif /* USE_AVX2 indicates whether to compile with Intel AVX2 code. */ #undef USE_AVX2 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX2 1 #endif /* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI #undef ASM_EXTRA_STACK #if defined(USE_AVX2) && defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) # define ASM_FUNC_ABI __attribute__((sysv_abi)) # define ASM_EXTRA_STACK (10 * 16) #else # define ASM_FUNC_ABI # define ASM_EXTRA_STACK 0 #endif #define BLAKE2B_BLOCKBYTES 128 #define BLAKE2B_OUTBYTES 64 #define BLAKE2B_KEYBYTES 64 #define BLAKE2S_BLOCKBYTES 64 #define BLAKE2S_OUTBYTES 32 #define BLAKE2S_KEYBYTES 32 typedef struct { u64 h[8]; u64 t[2]; u64 f[2]; } BLAKE2B_STATE; struct blake2b_param_s { byte digest_length; byte key_length; byte fanout; byte depth; byte leaf_length[4]; byte node_offset[4]; byte xof_length[4]; byte node_depth; byte inner_length; byte reserved[14]; byte salt[16]; byte personal[16]; }; typedef struct BLAKE2B_CONTEXT_S { BLAKE2B_STATE state; byte buf[BLAKE2B_BLOCKBYTES]; size_t buflen; size_t outlen; #ifdef USE_AVX2 unsigned int use_avx2:1; #endif } BLAKE2B_CONTEXT; typedef struct { u32 h[8]; u32 t[2]; u32 f[2]; } BLAKE2S_STATE; struct blake2s_param_s { byte digest_length; byte key_length; byte fanout; byte depth; byte leaf_length[4]; byte node_offset[4]; byte xof_length[2]; byte node_depth; byte inner_length; /* byte reserved[0]; */ byte salt[8]; byte personal[8]; }; typedef struct BLAKE2S_CONTEXT_S { BLAKE2S_STATE state; byte buf[BLAKE2S_BLOCKBYTES]; size_t buflen; size_t outlen; #ifdef USE_AVX unsigned int use_avx:1; #endif } BLAKE2S_CONTEXT; typedef unsigned int (*blake2_transform_t)(void *S, const void *inblk, size_t nblks); static const u64 blake2b_IV[8] = { U64_C(0x6a09e667f3bcc908), U64_C(0xbb67ae8584caa73b), U64_C(0x3c6ef372fe94f82b), U64_C(0xa54ff53a5f1d36f1), U64_C(0x510e527fade682d1), U64_C(0x9b05688c2b3e6c1f), U64_C(0x1f83d9abfb41bd6b), U64_C(0x5be0cd19137e2179) }; static const u32 blake2s_IV[8] = { 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL }; static byte zero_block[BLAKE2B_BLOCKBYTES] = { 0, }; static void blake2_write(void *S, const void *inbuf, size_t inlen, byte *tmpbuf, size_t *tmpbuflen, size_t blkbytes, blake2_transform_t transform_fn) { const byte* in = inbuf; unsigned int burn = 0; if (inlen > 0) { size_t left = *tmpbuflen; size_t fill = blkbytes - left; size_t nblks; if (inlen > fill) { if (fill > 0) buf_cpy (tmpbuf + left, in, fill); /* Fill buffer */ left = 0; burn = transform_fn (S, tmpbuf, 1); /* Increment counter + Compress */ in += fill; inlen -= fill; nblks = inlen / blkbytes - !(inlen % blkbytes); if (nblks) { burn = transform_fn(S, in, nblks); in += blkbytes * nblks; inlen -= blkbytes * nblks; } } gcry_assert (inlen > 0); buf_cpy (tmpbuf + left, in, inlen); *tmpbuflen = left + inlen; } if (burn) _gcry_burn_stack (burn); return; } static inline void blake2b_set_lastblock(BLAKE2B_STATE *S) { S->f[0] = U64_C(0xffffffffffffffff); } static inline int blake2b_is_lastblock(const BLAKE2B_STATE *S) { return S->f[0] != 0; } static inline void blake2b_increment_counter(BLAKE2B_STATE *S, const int inc) { S->t[0] += (u64)inc; S->t[1] += (S->t[0] < (u64)inc) - (inc < 0); } static inline u64 rotr64(u64 x, u64 n) { return ((x >> (n & 63)) | (x << ((64 - n) & 63))); } static unsigned int blake2b_transform_generic(BLAKE2B_STATE *S, const void *inblks, size_t nblks) { static const byte blake2b_sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } }; const byte* in = inblks; u64 m[16]; u64 v[16]; while (nblks--) { /* Increment counter */ blake2b_increment_counter (S, BLAKE2B_BLOCKBYTES); /* Compress */ m[0] = buf_get_le64 (in + 0 * sizeof(m[0])); m[1] = buf_get_le64 (in + 1 * sizeof(m[0])); m[2] = buf_get_le64 (in + 2 * sizeof(m[0])); m[3] = buf_get_le64 (in + 3 * sizeof(m[0])); m[4] = buf_get_le64 (in + 4 * sizeof(m[0])); m[5] = buf_get_le64 (in + 5 * sizeof(m[0])); m[6] = buf_get_le64 (in + 6 * sizeof(m[0])); m[7] = buf_get_le64 (in + 7 * sizeof(m[0])); m[8] = buf_get_le64 (in + 8 * sizeof(m[0])); m[9] = buf_get_le64 (in + 9 * sizeof(m[0])); m[10] = buf_get_le64 (in + 10 * sizeof(m[0])); m[11] = buf_get_le64 (in + 11 * sizeof(m[0])); m[12] = buf_get_le64 (in + 12 * sizeof(m[0])); m[13] = buf_get_le64 (in + 13 * sizeof(m[0])); m[14] = buf_get_le64 (in + 14 * sizeof(m[0])); m[15] = buf_get_le64 (in + 15 * sizeof(m[0])); v[ 0] = S->h[0]; v[ 1] = S->h[1]; v[ 2] = S->h[2]; v[ 3] = S->h[3]; v[ 4] = S->h[4]; v[ 5] = S->h[5]; v[ 6] = S->h[6]; v[ 7] = S->h[7]; v[ 8] = blake2b_IV[0]; v[ 9] = blake2b_IV[1]; v[10] = blake2b_IV[2]; v[11] = blake2b_IV[3]; v[12] = blake2b_IV[4] ^ S->t[0]; v[13] = blake2b_IV[5] ^ S->t[1]; v[14] = blake2b_IV[6] ^ S->f[0]; v[15] = blake2b_IV[7] ^ S->f[1]; #define G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ d = rotr64(d ^ a, 32); \ c = c + d; \ b = rotr64(b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ d = rotr64(d ^ a, 16); \ c = c + d; \ b = rotr64(b ^ c, 63); \ } while(0) #define ROUND(r) \ do { \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ G(r,2,v[ 2],v[ 6],v[10],v[14]); \ G(r,3,v[ 3],v[ 7],v[11],v[15]); \ G(r,4,v[ 0],v[ 5],v[10],v[15]); \ G(r,5,v[ 1],v[ 6],v[11],v[12]); \ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) ROUND(0); ROUND(1); ROUND(2); ROUND(3); ROUND(4); ROUND(5); ROUND(6); ROUND(7); ROUND(8); ROUND(9); ROUND(10); ROUND(11); #undef G #undef ROUND S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8]; S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8]; S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8]; S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8]; S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8]; S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8]; S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8]; S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8]; in += BLAKE2B_BLOCKBYTES; } return sizeof(void *) * 4 + sizeof(u64) * 16 * 2; } #ifdef USE_AVX2 unsigned int _gcry_blake2b_transform_amd64_avx2(BLAKE2B_STATE *S, const void *inblks, size_t nblks) ASM_FUNC_ABI; #endif static unsigned int blake2b_transform(void *ctx, const void *inblks, size_t nblks) { BLAKE2B_CONTEXT *c = ctx; unsigned int nburn; if (0) {} #ifdef USE_AVX2 if (c->use_avx2) nburn = _gcry_blake2b_transform_amd64_avx2(&c->state, inblks, nblks); #endif else nburn = blake2b_transform_generic(&c->state, inblks, nblks); if (nburn) nburn += ASM_EXTRA_STACK; return nburn; } static void blake2b_final(void *ctx) { BLAKE2B_CONTEXT *c = ctx; BLAKE2B_STATE *S = &c->state; unsigned int burn; size_t i; gcry_assert (sizeof(c->buf) >= c->outlen); if (blake2b_is_lastblock(S)) return; if (c->buflen < BLAKE2B_BLOCKBYTES) memset (c->buf + c->buflen, 0, BLAKE2B_BLOCKBYTES - c->buflen); /* Padding */ blake2b_set_lastblock (S); blake2b_increment_counter (S, (int)c->buflen - BLAKE2B_BLOCKBYTES); burn = blake2b_transform (ctx, c->buf, 1); /* Output full hash to buffer */ for (i = 0; i < 8; ++i) buf_put_le64 (c->buf + sizeof(S->h[i]) * i, S->h[i]); /* Zero out extra buffer bytes. */ if (c->outlen < sizeof(c->buf)) memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen); if (burn) _gcry_burn_stack (burn); } static byte *blake2b_read(void *ctx) { BLAKE2B_CONTEXT *c = ctx; return c->buf; } static void blake2b_write(void *ctx, const void *inbuf, size_t inlen) { BLAKE2B_CONTEXT *c = ctx; BLAKE2B_STATE *S = &c->state; blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2B_BLOCKBYTES, blake2b_transform); } static inline void blake2b_init_param(BLAKE2B_STATE *S, const struct blake2b_param_s *P) { const byte *p = (const byte *)P; size_t i; /* init xors IV with input parameter block */ /* IV XOR ParamBlock */ for (i = 0; i < 8; ++i) S->h[i] = blake2b_IV[i] ^ buf_get_le64(p + sizeof(S->h[i]) * i); } static inline gcry_err_code_t blake2b_init(BLAKE2B_CONTEXT *ctx, const byte *key, size_t keylen) { struct blake2b_param_s P[1] = { { 0, } }; BLAKE2B_STATE *S = &ctx->state; if (!ctx->outlen || ctx->outlen > BLAKE2B_OUTBYTES) return GPG_ERR_INV_ARG; if (sizeof(P[0]) != sizeof(u64) * 8) return GPG_ERR_INTERNAL; if (keylen && (!key || keylen > BLAKE2B_KEYBYTES)) return GPG_ERR_INV_KEYLEN; P->digest_length = ctx->outlen; P->key_length = keylen; P->fanout = 1; P->depth = 1; blake2b_init_param (S, P); wipememory (P, sizeof(P)); if (key) { blake2b_write (ctx, key, keylen); blake2b_write (ctx, zero_block, BLAKE2B_BLOCKBYTES - keylen); } return 0; } static gcry_err_code_t blake2b_init_ctx(void *ctx, unsigned int flags, const byte *key, size_t keylen, unsigned int dbits) { BLAKE2B_CONTEXT *c = ctx; unsigned int features = _gcry_get_hw_features (); (void)features; (void)flags; memset (c, 0, sizeof (*c)); #ifdef USE_AVX2 c->use_avx2 = !!(features & HWF_INTEL_AVX2); #endif c->outlen = dbits / 8; c->buflen = 0; return blake2b_init(c, key, keylen); } static inline void blake2s_set_lastblock(BLAKE2S_STATE *S) { S->f[0] = 0xFFFFFFFFUL; } static inline int blake2s_is_lastblock(BLAKE2S_STATE *S) { return S->f[0] != 0; } static inline void blake2s_increment_counter(BLAKE2S_STATE *S, const int inc) { S->t[0] += (u32)inc; S->t[1] += (S->t[0] < (u32)inc) - (inc < 0); } static unsigned int blake2s_transform_generic(BLAKE2S_STATE *S, const void *inblks, size_t nblks) { static const byte blake2s_sigma[10][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, }; unsigned int burn = 0; const byte* in = inblks; u32 m[16]; u32 v[16]; while (nblks--) { /* Increment counter */ blake2s_increment_counter (S, BLAKE2S_BLOCKBYTES); /* Compress */ m[0] = buf_get_le32 (in + 0 * sizeof(m[0])); m[1] = buf_get_le32 (in + 1 * sizeof(m[0])); m[2] = buf_get_le32 (in + 2 * sizeof(m[0])); m[3] = buf_get_le32 (in + 3 * sizeof(m[0])); m[4] = buf_get_le32 (in + 4 * sizeof(m[0])); m[5] = buf_get_le32 (in + 5 * sizeof(m[0])); m[6] = buf_get_le32 (in + 6 * sizeof(m[0])); m[7] = buf_get_le32 (in + 7 * sizeof(m[0])); m[8] = buf_get_le32 (in + 8 * sizeof(m[0])); m[9] = buf_get_le32 (in + 9 * sizeof(m[0])); m[10] = buf_get_le32 (in + 10 * sizeof(m[0])); m[11] = buf_get_le32 (in + 11 * sizeof(m[0])); m[12] = buf_get_le32 (in + 12 * sizeof(m[0])); m[13] = buf_get_le32 (in + 13 * sizeof(m[0])); m[14] = buf_get_le32 (in + 14 * sizeof(m[0])); m[15] = buf_get_le32 (in + 15 * sizeof(m[0])); v[ 0] = S->h[0]; v[ 1] = S->h[1]; v[ 2] = S->h[2]; v[ 3] = S->h[3]; v[ 4] = S->h[4]; v[ 5] = S->h[5]; v[ 6] = S->h[6]; v[ 7] = S->h[7]; v[ 8] = blake2s_IV[0]; v[ 9] = blake2s_IV[1]; v[10] = blake2s_IV[2]; v[11] = blake2s_IV[3]; v[12] = S->t[0] ^ blake2s_IV[4]; v[13] = S->t[1] ^ blake2s_IV[5]; v[14] = S->f[0] ^ blake2s_IV[6]; v[15] = S->f[1] ^ blake2s_IV[7]; #define G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2s_sigma[r][2*i+0]]; \ d = ror(d ^ a, 16); \ c = c + d; \ b = ror(b ^ c, 12); \ a = a + b + m[blake2s_sigma[r][2*i+1]]; \ d = ror(d ^ a, 8); \ c = c + d; \ b = ror(b ^ c, 7); \ } while(0) #define ROUND(r) \ do { \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ G(r,2,v[ 2],v[ 6],v[10],v[14]); \ G(r,3,v[ 3],v[ 7],v[11],v[15]); \ G(r,4,v[ 0],v[ 5],v[10],v[15]); \ G(r,5,v[ 1],v[ 6],v[11],v[12]); \ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) ROUND(0); ROUND(1); ROUND(2); ROUND(3); ROUND(4); ROUND(5); ROUND(6); ROUND(7); ROUND(8); ROUND(9); #undef G #undef ROUND S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8]; S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8]; S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8]; S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8]; S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8]; S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8]; S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8]; S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8]; in += BLAKE2S_BLOCKBYTES; } return burn; } #ifdef USE_AVX unsigned int _gcry_blake2s_transform_amd64_avx(BLAKE2S_STATE *S, const void *inblks, size_t nblks) ASM_FUNC_ABI; #endif static unsigned int blake2s_transform(void *ctx, const void *inblks, size_t nblks) { BLAKE2S_CONTEXT *c = ctx; unsigned int nburn; if (0) {} #ifdef USE_AVX if (c->use_avx) nburn = _gcry_blake2s_transform_amd64_avx(&c->state, inblks, nblks); #endif else nburn = blake2s_transform_generic(&c->state, inblks, nblks); if (nburn) nburn += ASM_EXTRA_STACK; return nburn; } static void blake2s_final(void *ctx) { BLAKE2S_CONTEXT *c = ctx; BLAKE2S_STATE *S = &c->state; unsigned int burn; size_t i; gcry_assert (sizeof(c->buf) >= c->outlen); if (blake2s_is_lastblock(S)) return; if (c->buflen < BLAKE2S_BLOCKBYTES) memset (c->buf + c->buflen, 0, BLAKE2S_BLOCKBYTES - c->buflen); /* Padding */ blake2s_set_lastblock (S); blake2s_increment_counter (S, (int)c->buflen - BLAKE2S_BLOCKBYTES); burn = blake2s_transform (ctx, c->buf, 1); /* Output full hash to buffer */ for (i = 0; i < 8; ++i) buf_put_le32 (c->buf + sizeof(S->h[i]) * i, S->h[i]); /* Zero out extra buffer bytes. */ if (c->outlen < sizeof(c->buf)) memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen); if (burn) _gcry_burn_stack (burn); } static byte *blake2s_read(void *ctx) { BLAKE2S_CONTEXT *c = ctx; return c->buf; } static void blake2s_write(void *ctx, const void *inbuf, size_t inlen) { BLAKE2S_CONTEXT *c = ctx; BLAKE2S_STATE *S = &c->state; blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2S_BLOCKBYTES, blake2s_transform); } static inline void blake2s_init_param(BLAKE2S_STATE *S, const struct blake2s_param_s *P) { const byte *p = (const byte *)P; size_t i; /* init2 xors IV with input parameter block */ /* IV XOR ParamBlock */ for (i = 0; i < 8; ++i) S->h[i] ^= blake2s_IV[i] ^ buf_get_le32(&p[i * 4]); } static inline gcry_err_code_t blake2s_init(BLAKE2S_CONTEXT *ctx, const byte *key, size_t keylen) { struct blake2s_param_s P[1] = { { 0, } }; BLAKE2S_STATE *S = &ctx->state; if (!ctx->outlen || ctx->outlen > BLAKE2S_OUTBYTES) return GPG_ERR_INV_ARG; if (sizeof(P[0]) != sizeof(u32) * 8) return GPG_ERR_INTERNAL; if (keylen && (!key || keylen > BLAKE2S_KEYBYTES)) return GPG_ERR_INV_KEYLEN; P->digest_length = ctx->outlen; P->key_length = keylen; P->fanout = 1; P->depth = 1; blake2s_init_param (S, P); wipememory (P, sizeof(P)); if (key) { blake2s_write (ctx, key, keylen); blake2s_write (ctx, zero_block, BLAKE2S_BLOCKBYTES - keylen); } return 0; } static gcry_err_code_t blake2s_init_ctx(void *ctx, unsigned int flags, const byte *key, size_t keylen, unsigned int dbits) { BLAKE2S_CONTEXT *c = ctx; unsigned int features = _gcry_get_hw_features (); (void)features; (void)flags; memset (c, 0, sizeof (*c)); #ifdef USE_AVX c->use_avx = !!(features & HWF_INTEL_AVX); #endif c->outlen = dbits / 8; c->buflen = 0; return blake2s_init(c, key, keylen); } /* Selftests from "RFC 7693, Appendix E. BLAKE2b and BLAKE2s Self-Test * Module C Source". */ static void selftest_seq(byte *out, size_t len, u32 seed) { size_t i; u32 t, a, b; a = 0xDEAD4BAD * seed; b = 1; for (i = 0; i < len; i++) { t = a + b; a = b; b = t; out[i] = (t >> 24) & 0xFF; } } static gpg_err_code_t selftests_blake2b (int algo, int extended, selftest_report_func_t report) { static const byte blake2b_res[32] = { 0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD, 0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56, 0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73, 0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75 }; static const size_t b2b_md_len[4] = { 20, 32, 48, 64 }; static const size_t b2b_in_len[6] = { 0, 3, 128, 129, 255, 1024 }; size_t i, j, outlen, inlen; byte in[1024], key[64]; BLAKE2B_CONTEXT ctx; BLAKE2B_CONTEXT ctx2; const char *what; const char *errtxt; (void)extended; what = "rfc7693 BLAKE2b selftest"; /* 256-bit hash for testing */ if (blake2b_init_ctx(&ctx, 0, NULL, 0, 32 * 8)) { errtxt = "init failed"; goto failed; } for (i = 0; i < 4; i++) { outlen = b2b_md_len[i]; for (j = 0; j < 6; j++) { inlen = b2b_in_len[j]; selftest_seq(in, inlen, inlen); /* unkeyed hash */ blake2b_init_ctx(&ctx2, 0, NULL, 0, outlen * 8); blake2b_write(&ctx2, in, inlen); blake2b_final(&ctx2); blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */ selftest_seq(key, outlen, outlen); /* keyed hash */ blake2b_init_ctx(&ctx2, 0, key, outlen, outlen * 8); blake2b_write(&ctx2, in, inlen); blake2b_final(&ctx2); blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */ } } /* compute and compare the hash of hashes */ blake2b_final(&ctx); for (i = 0; i < 32; i++) { if (ctx.buf[i] != blake2b_res[i]) { errtxt = "digest mismatch"; goto failed; } } return 0; failed: if (report) report ("digest", algo, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } static gpg_err_code_t selftests_blake2s (int algo, int extended, selftest_report_func_t report) { static const byte blake2s_res[32] = { 0x6A, 0x41, 0x1F, 0x08, 0xCE, 0x25, 0xAD, 0xCD, 0xFB, 0x02, 0xAB, 0xA6, 0x41, 0x45, 0x1C, 0xEC, 0x53, 0xC5, 0x98, 0xB2, 0x4F, 0x4F, 0xC7, 0x87, 0xFB, 0xDC, 0x88, 0x79, 0x7F, 0x4C, 0x1D, 0xFE }; static const size_t b2s_md_len[4] = { 16, 20, 28, 32 }; static const size_t b2s_in_len[6] = { 0, 3, 64, 65, 255, 1024 }; size_t i, j, outlen, inlen; byte in[1024], key[32]; BLAKE2S_CONTEXT ctx; BLAKE2S_CONTEXT ctx2; const char *what; const char *errtxt; (void)extended; what = "rfc7693 BLAKE2s selftest"; /* 256-bit hash for testing */ if (blake2s_init_ctx(&ctx, 0, NULL, 0, 32 * 8)) { errtxt = "init failed"; goto failed; } for (i = 0; i < 4; i++) { outlen = b2s_md_len[i]; for (j = 0; j < 6; j++) { inlen = b2s_in_len[j]; selftest_seq(in, inlen, inlen); /* unkeyed hash */ blake2s_init_ctx(&ctx2, 0, NULL, 0, outlen * 8); blake2s_write(&ctx2, in, inlen); blake2s_final(&ctx2); blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */ selftest_seq(key, outlen, outlen); /* keyed hash */ blake2s_init_ctx(&ctx2, 0, key, outlen, outlen * 8); blake2s_write(&ctx2, in, inlen); blake2s_final(&ctx2); blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */ } } /* compute and compare the hash of hashes */ blake2s_final(&ctx); for (i = 0; i < 32; i++) { if (ctx.buf[i] != blake2s_res[i]) { errtxt = "digest mismatch"; goto failed; } } return 0; failed: if (report) report ("digest", algo, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } gcry_err_code_t _gcry_blake2_init_with_key(void *ctx, unsigned int flags, const unsigned char *key, size_t keylen, int algo) { gcry_err_code_t rc; switch (algo) { case GCRY_MD_BLAKE2B_512: rc = blake2b_init_ctx (ctx, flags, key, keylen, 512); break; case GCRY_MD_BLAKE2B_384: rc = blake2b_init_ctx (ctx, flags, key, keylen, 384); break; case GCRY_MD_BLAKE2B_256: rc = blake2b_init_ctx (ctx, flags, key, keylen, 256); break; case GCRY_MD_BLAKE2B_160: rc = blake2b_init_ctx (ctx, flags, key, keylen, 160); break; case GCRY_MD_BLAKE2S_256: rc = blake2s_init_ctx (ctx, flags, key, keylen, 256); break; case GCRY_MD_BLAKE2S_224: rc = blake2s_init_ctx (ctx, flags, key, keylen, 224); break; case GCRY_MD_BLAKE2S_160: rc = blake2s_init_ctx (ctx, flags, key, keylen, 160); break; case GCRY_MD_BLAKE2S_128: rc = blake2s_init_ctx (ctx, flags, key, keylen, 128); break; default: rc = GPG_ERR_DIGEST_ALGO; break; } return rc; } #define DEFINE_BLAKE2_VARIANT(bs, BS, dbits, oid_branch) \ static void blake2##bs##_##dbits##_init(void *ctx, unsigned int flags) \ { \ int err = blake2##bs##_init_ctx (ctx, flags, NULL, 0, dbits); \ gcry_assert (err == 0); \ } \ + static void \ + _gcry_blake2##bs##_##dbits##_hash_buffer(void *outbuf, \ + const void *buffer, size_t length) \ + { \ + BLAKE2##BS##_CONTEXT hd; \ + blake2##bs##_##dbits##_init (&hd, 0); \ + blake2##bs##_write (&hd, buffer, length); \ + blake2##bs##_final (&hd); \ + memcpy (outbuf, blake2##bs##_read (&hd), dbits / 8); \ + } \ + static void \ + _gcry_blake2##bs##_##dbits##_hash_buffers(void *outbuf, \ + const gcry_buffer_t *iov, int iovcnt) \ + { \ + BLAKE2##BS##_CONTEXT hd; \ + blake2##bs##_##dbits##_init (&hd, 0); \ + for (;iovcnt > 0; iov++, iovcnt--) \ + blake2##bs##_write (&hd, (const char*)iov[0].data + iov[0].off, \ + iov[0].len); \ + blake2##bs##_final (&hd); \ + memcpy (outbuf, blake2##bs##_read (&hd), dbits / 8); \ + } \ static byte blake2##bs##_##dbits##_asn[] = { 0x30 }; \ static gcry_md_oid_spec_t oid_spec_blake2##bs##_##dbits[] = \ { \ { " 1.3.6.1.4.1.1722.12.2." oid_branch }, \ { NULL } \ }; \ gcry_md_spec_t _gcry_digest_spec_blake2##bs##_##dbits = \ { \ GCRY_MD_BLAKE2##BS##_##dbits, {0, 0}, \ "BLAKE2" #BS "_" #dbits, blake2##bs##_##dbits##_asn, \ DIM (blake2##bs##_##dbits##_asn), oid_spec_blake2##bs##_##dbits, \ dbits / 8, blake2##bs##_##dbits##_init, blake2##bs##_write, \ blake2##bs##_final, blake2##bs##_read, NULL, \ - NULL, NULL, \ + _gcry_blake2##bs##_##dbits##_hash_buffer, \ + _gcry_blake2##bs##_##dbits##_hash_buffers, \ sizeof (BLAKE2##BS##_CONTEXT), selftests_blake2##bs \ }; DEFINE_BLAKE2_VARIANT(b, B, 512, "1.16") DEFINE_BLAKE2_VARIANT(b, B, 384, "1.12") DEFINE_BLAKE2_VARIANT(b, B, 256, "1.8") DEFINE_BLAKE2_VARIANT(b, B, 160, "1.5") DEFINE_BLAKE2_VARIANT(s, S, 256, "2.8") DEFINE_BLAKE2_VARIANT(s, S, 224, "2.7") DEFINE_BLAKE2_VARIANT(s, S, 160, "2.5") DEFINE_BLAKE2_VARIANT(s, S, 128, "2.4") diff --git a/cipher/keccak.c b/cipher/keccak.c index db67d071..24963f12 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -1,1272 +1,1354 @@ /* keccak.c - SHA3 hash functions * Copyright (C) 2015 g10 Code GmbH * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include "g10lib.h" #include "bithelp.h" #include "bufhelp.h" #include "cipher.h" #include "hash-common.h" /* USE_64BIT indicates whether to use 64-bit generic implementation. * USE_32BIT indicates whether to use 32-bit generic implementation. */ #undef USE_64BIT #if defined(__x86_64__) || SIZEOF_UNSIGNED_LONG == 8 # define USE_64BIT 1 #else # define USE_32BIT 1 #endif /* USE_64BIT_BMI2 indicates whether to compile with 64-bit Intel BMI2 code. */ #undef USE_64BIT_BMI2 #if defined(USE_64BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) # define USE_64BIT_BMI2 1 #endif /* USE_64BIT_SHLD indicates whether to compile with 64-bit Intel SHLD code. */ #undef USE_64BIT_SHLD #if defined(USE_64BIT) && defined (__GNUC__) && defined(__x86_64__) # define USE_64BIT_SHLD 1 #endif /* USE_32BIT_BMI2 indicates whether to compile with 32-bit Intel BMI2 code. */ #undef USE_32BIT_BMI2 #if defined(USE_32BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) # define USE_32BIT_BMI2 1 #endif /* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly * code. */ #undef USE_64BIT_ARM_NEON #ifdef ENABLE_NEON_SUPPORT # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_NEON) # define USE_64BIT_ARM_NEON 1 # endif #endif /*ENABLE_NEON_SUPPORT*/ #if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON) # define NEED_COMMON64 1 #endif #ifdef USE_32BIT # define NEED_COMMON32BI 1 #endif #define SHA3_DELIMITED_SUFFIX 0x06 #define SHAKE_DELIMITED_SUFFIX 0x1F typedef struct { union { #ifdef NEED_COMMON64 u64 state64[25]; #endif #ifdef NEED_COMMON32BI u32 state32bi[50]; #endif } u; } KECCAK_STATE; typedef struct { unsigned int (*permute)(KECCAK_STATE *hd); unsigned int (*absorb)(KECCAK_STATE *hd, int pos, const byte *lanes, unsigned int nlanes, int blocklanes); unsigned int (*extract) (KECCAK_STATE *hd, unsigned int pos, byte *outbuf, unsigned int outlen); } keccak_ops_t; typedef struct KECCAK_CONTEXT_S { KECCAK_STATE state; unsigned int outlen; unsigned int blocksize; unsigned int count; unsigned int suffix; const keccak_ops_t *ops; } KECCAK_CONTEXT; #ifdef NEED_COMMON64 const u64 _gcry_keccak_round_consts_64bit[24 + 1] = { U64_C(0x0000000000000001), U64_C(0x0000000000008082), U64_C(0x800000000000808A), U64_C(0x8000000080008000), U64_C(0x000000000000808B), U64_C(0x0000000080000001), U64_C(0x8000000080008081), U64_C(0x8000000000008009), U64_C(0x000000000000008A), U64_C(0x0000000000000088), U64_C(0x0000000080008009), U64_C(0x000000008000000A), U64_C(0x000000008000808B), U64_C(0x800000000000008B), U64_C(0x8000000000008089), U64_C(0x8000000000008003), U64_C(0x8000000000008002), U64_C(0x8000000000000080), U64_C(0x000000000000800A), U64_C(0x800000008000000A), U64_C(0x8000000080008081), U64_C(0x8000000000008080), U64_C(0x0000000080000001), U64_C(0x8000000080008008), U64_C(0xFFFFFFFFFFFFFFFF) }; static unsigned int keccak_extract64(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, unsigned int outlen) { unsigned int i; /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { u64 tmp = hd->u.state64[i]; buf_put_le64(outbuf, tmp); outbuf += 8; } return 0; } #endif /* NEED_COMMON64 */ #ifdef NEED_COMMON32BI static const u32 round_consts_32bit[2 * 24] = { 0x00000001UL, 0x00000000UL, 0x00000000UL, 0x00000089UL, 0x00000000UL, 0x8000008bUL, 0x00000000UL, 0x80008080UL, 0x00000001UL, 0x0000008bUL, 0x00000001UL, 0x00008000UL, 0x00000001UL, 0x80008088UL, 0x00000001UL, 0x80000082UL, 0x00000000UL, 0x0000000bUL, 0x00000000UL, 0x0000000aUL, 0x00000001UL, 0x00008082UL, 0x00000000UL, 0x00008003UL, 0x00000001UL, 0x0000808bUL, 0x00000001UL, 0x8000000bUL, 0x00000001UL, 0x8000008aUL, 0x00000001UL, 0x80000081UL, 0x00000000UL, 0x80000081UL, 0x00000000UL, 0x80000008UL, 0x00000000UL, 0x00000083UL, 0x00000000UL, 0x80008003UL, 0x00000001UL, 0x80008088UL, 0x00000000UL, 0x80000088UL, 0x00000001UL, 0x00008000UL, 0x00000000UL, 0x80008082UL }; static unsigned int keccak_extract32bi(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, unsigned int outlen) { unsigned int i; u32 x0; u32 x1; u32 t; /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { x0 = hd->u.state32bi[i * 2 + 0]; x1 = hd->u.state32bi[i * 2 + 1]; t = (x0 & 0x0000FFFFUL) + (x1 << 16); x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL); x0 = t; t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); buf_put_le32(&outbuf[0], x0); buf_put_le32(&outbuf[4], x1); outbuf += 8; } return 0; } static inline void keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1) { u32 t; t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16); lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL); } #endif /* NEED_COMMON32BI */ /* Construct generic 64-bit implementation. */ #ifdef USE_64BIT #if __GNUC__ >= 4 && defined(__x86_64__) static inline void absorb_lanes64_8(u64 *dst, const byte *in) { asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" "movdqu 0*16(%[in]), %%xmm4\n\t" "movdqu 1*16(%[dst]), %%xmm1\n\t" "movdqu 1*16(%[in]), %%xmm5\n\t" "movdqu 2*16(%[dst]), %%xmm2\n\t" "movdqu 3*16(%[dst]), %%xmm3\n\t" "pxor %%xmm4, %%xmm0\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu 2*16(%[in]), %%xmm4\n\t" "movdqu 3*16(%[in]), %%xmm5\n\t" "movdqu %%xmm0, 0*16(%[dst])\n\t" "pxor %%xmm4, %%xmm2\n\t" "movdqu %%xmm1, 1*16(%[dst])\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqu %%xmm2, 2*16(%[dst])\n\t" "movdqu %%xmm3, 3*16(%[dst])\n\t" : : [dst] "r" (dst), [in] "r" (in) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); } static inline void absorb_lanes64_4(u64 *dst, const byte *in) { asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" "movdqu 0*16(%[in]), %%xmm4\n\t" "movdqu 1*16(%[dst]), %%xmm1\n\t" "movdqu 1*16(%[in]), %%xmm5\n\t" "pxor %%xmm4, %%xmm0\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu %%xmm0, 0*16(%[dst])\n\t" "movdqu %%xmm1, 1*16(%[dst])\n\t" : : [dst] "r" (dst), [in] "r" (in) : "xmm0", "xmm1", "xmm4", "xmm5", "memory"); } static inline void absorb_lanes64_2(u64 *dst, const byte *in) { asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" "movdqu 0*16(%[in]), %%xmm4\n\t" "pxor %%xmm4, %%xmm0\n\t" "movdqu %%xmm0, 0*16(%[dst])\n\t" : : [dst] "r" (dst), [in] "r" (in) : "xmm0", "xmm4", "memory"); } #else /* __x86_64__ */ static inline void absorb_lanes64_8(u64 *dst, const byte *in) { dst[0] ^= buf_get_le64(in + 8 * 0); dst[1] ^= buf_get_le64(in + 8 * 1); dst[2] ^= buf_get_le64(in + 8 * 2); dst[3] ^= buf_get_le64(in + 8 * 3); dst[4] ^= buf_get_le64(in + 8 * 4); dst[5] ^= buf_get_le64(in + 8 * 5); dst[6] ^= buf_get_le64(in + 8 * 6); dst[7] ^= buf_get_le64(in + 8 * 7); } static inline void absorb_lanes64_4(u64 *dst, const byte *in) { dst[0] ^= buf_get_le64(in + 8 * 0); dst[1] ^= buf_get_le64(in + 8 * 1); dst[2] ^= buf_get_le64(in + 8 * 2); dst[3] ^= buf_get_le64(in + 8 * 3); } static inline void absorb_lanes64_2(u64 *dst, const byte *in) { dst[0] ^= buf_get_le64(in + 8 * 0); dst[1] ^= buf_get_le64(in + 8 * 1); } #endif /* !__x86_64__ */ static inline void absorb_lanes64_1(u64 *dst, const byte *in) { dst[0] ^= buf_get_le64(in + 8 * 0); } # define ANDN64(x, y) (~(x) & (y)) # define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \ ((x) >> ((64 - (unsigned int)(n)) & 63))) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64 # define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64 # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME # undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_generic64_ops = { .permute = keccak_f1600_state_permute64, .absorb = keccak_absorb_lanes64, .extract = keccak_extract64, }; #endif /* USE_64BIT */ /* Construct 64-bit Intel SHLD implementation. */ #ifdef USE_64BIT_SHLD # define ANDN64(x, y) (~(x) & (y)) # define ROL64(x, n) ({ \ u64 tmp = (x); \ asm ("shldq %1, %0, %0" \ : "+r" (tmp) \ : "J" ((n) & 63) \ : "cc"); \ tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld # define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME # undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_shld_64_ops = { .permute = keccak_f1600_state_permute64_shld, .absorb = keccak_absorb_lanes64_shld, .extract = keccak_extract64, }; #endif /* USE_64BIT_SHLD */ /* Construct 64-bit Intel BMI2 implementation. */ #ifdef USE_64BIT_BMI2 # define ANDN64(x, y) ({ \ u64 tmp; \ asm ("andnq %2, %1, %0" \ : "=r" (tmp) \ : "r0" (x), "rm" (y)); \ tmp; }) # define ROL64(x, n) ({ \ u64 tmp; \ asm ("rorxq %2, %1, %0" \ : "=r" (tmp) \ : "rm0" (x), "J" (64 - ((n) & 63))); \ tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2 # define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2 # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME # undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_bmi2_64_ops = { .permute = keccak_f1600_state_permute64_bmi2, .absorb = keccak_absorb_lanes64_bmi2, .extract = keccak_extract64, }; #endif /* USE_64BIT_BMI2 */ /* 64-bit ARMv7/NEON implementation. */ #ifdef USE_64BIT_ARM_NEON unsigned int _gcry_keccak_permute_armv7_neon(u64 *state); unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos, const byte *lanes, unsigned int nlanes, int blocklanes); static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd) { return _gcry_keccak_permute_armv7_neon(hd->u.state64); } static unsigned int keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes, unsigned int nlanes, int blocklanes) { if (blocklanes < 0) { /* blocklanes == -1, permutationless absorb from keccak_final. */ while (nlanes) { hd->u.state64[pos] ^= buf_get_le64(lanes); lanes += 8; nlanes--; } return 0; } else { return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes, nlanes, blocklanes); } } static const keccak_ops_t keccak_armv7_neon_64_ops = { .permute = keccak_permute64_armv7_neon, .absorb = keccak_absorb_lanes64_armv7_neon, .extract = keccak_extract64, }; #endif /* USE_64BIT_ARM_NEON */ /* Construct generic 32-bit implementation. */ #ifdef USE_32BIT # define ANDN32(x, y) (~(x) & (y)) # define ROL32(x, n) (((x) << ((unsigned int)n & 31)) | \ ((x) >> ((32 - (unsigned int)(n)) & 31))) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi # include "keccak_permute_32.h" # undef ANDN32 # undef ROL32 # undef KECCAK_F1600_PERMUTE_FUNC_NAME static unsigned int keccak_absorb_lanes32bi(KECCAK_STATE *hd, int pos, const byte *lanes, unsigned int nlanes, int blocklanes) { unsigned int burn = 0; while (nlanes) { keccak_absorb_lane32bi(&hd->u.state32bi[pos * 2], buf_get_le32(lanes + 0), buf_get_le32(lanes + 4)); lanes += 8; nlanes--; if (++pos == blocklanes) { burn = keccak_f1600_state_permute32bi(hd); pos = 0; } } return burn; } static const keccak_ops_t keccak_generic32bi_ops = { .permute = keccak_f1600_state_permute32bi, .absorb = keccak_absorb_lanes32bi, .extract = keccak_extract32bi, }; #endif /* USE_32BIT */ /* Construct 32-bit Intel BMI2 implementation. */ #ifdef USE_32BIT_BMI2 # define ANDN32(x, y) ({ \ u32 tmp; \ asm ("andnl %2, %1, %0" \ : "=r" (tmp) \ : "r0" (x), "rm" (y)); \ tmp; }) # define ROL32(x, n) ({ \ u32 tmp; \ asm ("rorxl %2, %1, %0" \ : "=r" (tmp) \ : "rm0" (x), "J" (32 - ((n) & 31))); \ tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi_bmi2 # include "keccak_permute_32.h" # undef ANDN32 # undef ROL32 # undef KECCAK_F1600_PERMUTE_FUNC_NAME static inline u32 pext(u32 x, u32 mask) { u32 tmp; asm ("pextl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask)); return tmp; } static inline u32 pdep(u32 x, u32 mask) { u32 tmp; asm ("pdepl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask)); return tmp; } static inline void keccak_absorb_lane32bi_bmi2(u32 *lane, u32 x0, u32 x1) { x0 = pdep(pext(x0, 0x55555555), 0x0000ffff) | (pext(x0, 0xaaaaaaaa) << 16); x1 = pdep(pext(x1, 0x55555555), 0x0000ffff) | (pext(x1, 0xaaaaaaaa) << 16); lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16); lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL); } static unsigned int keccak_absorb_lanes32bi_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes, unsigned int nlanes, int blocklanes) { unsigned int burn = 0; while (nlanes) { keccak_absorb_lane32bi_bmi2(&hd->u.state32bi[pos * 2], buf_get_le32(lanes + 0), buf_get_le32(lanes + 4)); lanes += 8; nlanes--; if (++pos == blocklanes) { burn = keccak_f1600_state_permute32bi_bmi2(hd); pos = 0; } } return burn; } static unsigned int keccak_extract32bi_bmi2(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, unsigned int outlen) { unsigned int i; u32 x0; u32 x1; u32 t; /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { x0 = hd->u.state32bi[i * 2 + 0]; x1 = hd->u.state32bi[i * 2 + 1]; t = (x0 & 0x0000FFFFUL) + (x1 << 16); x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL); x0 = t; x0 = pdep(pext(x0, 0xffff0001), 0xaaaaaaab) | pdep(x0 >> 1, 0x55555554); x1 = pdep(pext(x1, 0xffff0001), 0xaaaaaaab) | pdep(x1 >> 1, 0x55555554); buf_put_le32(&outbuf[0], x0); buf_put_le32(&outbuf[4], x1); outbuf += 8; } return 0; } static const keccak_ops_t keccak_bmi2_32bi_ops = { .permute = keccak_f1600_state_permute32bi_bmi2, .absorb = keccak_absorb_lanes32bi_bmi2, .extract = keccak_extract32bi_bmi2, }; #endif /* USE_32BIT */ static void keccak_write (void *context, const void *inbuf_arg, size_t inlen) { KECCAK_CONTEXT *ctx = context; const size_t bsize = ctx->blocksize; const size_t blocklanes = bsize / 8; const byte *inbuf = inbuf_arg; unsigned int nburn, burn = 0; unsigned int count, i; unsigned int pos, nlanes; count = ctx->count; if (inlen && (count % 8)) { byte lane[8] = { 0, }; /* Complete absorbing partial input lane. */ pos = count / 8; for (i = count % 8; inlen && i < 8; i++) { lane[i] = *inbuf++; inlen--; count++; } if (count == bsize) count = 0; nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, (count % 8) ? -1 : blocklanes); burn = nburn > burn ? nburn : burn; } /* Absorb full input lanes. */ pos = count / 8; nlanes = inlen / 8; if (nlanes > 0) { nburn = ctx->ops->absorb(&ctx->state, pos, inbuf, nlanes, blocklanes); burn = nburn > burn ? nburn : burn; inlen -= nlanes * 8; inbuf += nlanes * 8; count += nlanes * 8; count = count % bsize; } if (inlen) { byte lane[8] = { 0, }; /* Absorb remaining partial input lane. */ pos = count / 8; for (i = count % 8; inlen && i < 8; i++) { lane[i] = *inbuf++; inlen--; count++; } nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, -1); burn = nburn > burn ? nburn : burn; gcry_assert(count < bsize); } ctx->count = count; if (burn) _gcry_burn_stack (burn); } static void keccak_init (int algo, void *context, unsigned int flags) { KECCAK_CONTEXT *ctx = context; KECCAK_STATE *hd = &ctx->state; unsigned int features = _gcry_get_hw_features (); (void)flags; (void)features; memset (hd, 0, sizeof *hd); ctx->count = 0; /* Select generic implementation. */ #ifdef USE_64BIT ctx->ops = &keccak_generic64_ops; #elif defined USE_32BIT ctx->ops = &keccak_generic32bi_ops; #endif /* Select optimized implementation based in hw features. */ if (0) {} #ifdef USE_64BIT_ARM_NEON else if (features & HWF_ARM_NEON) ctx->ops = &keccak_armv7_neon_64_ops; #endif #ifdef USE_64BIT_BMI2 else if (features & HWF_INTEL_BMI2) ctx->ops = &keccak_bmi2_64_ops; #endif #ifdef USE_32BIT_BMI2 else if (features & HWF_INTEL_BMI2) ctx->ops = &keccak_bmi2_32bi_ops; #endif #ifdef USE_64BIT_SHLD else if (features & HWF_INTEL_FAST_SHLD) ctx->ops = &keccak_shld_64_ops; #endif /* Set input block size, in Keccak terms this is called 'rate'. */ switch (algo) { case GCRY_MD_SHA3_224: ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 1152 / 8; ctx->outlen = 224 / 8; break; case GCRY_MD_SHA3_256: ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 1088 / 8; ctx->outlen = 256 / 8; break; case GCRY_MD_SHA3_384: ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 832 / 8; ctx->outlen = 384 / 8; break; case GCRY_MD_SHA3_512: ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 576 / 8; ctx->outlen = 512 / 8; break; case GCRY_MD_SHAKE128: ctx->suffix = SHAKE_DELIMITED_SUFFIX; ctx->blocksize = 1344 / 8; ctx->outlen = 0; break; case GCRY_MD_SHAKE256: ctx->suffix = SHAKE_DELIMITED_SUFFIX; ctx->blocksize = 1088 / 8; ctx->outlen = 0; break; default: BUG(); } } static void sha3_224_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHA3_224, context, flags); } static void sha3_256_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHA3_256, context, flags); } static void sha3_384_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHA3_384, context, flags); } static void sha3_512_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHA3_512, context, flags); } static void shake128_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHAKE128, context, flags); } static void shake256_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHAKE256, context, flags); } /* The routine final terminates the computation and * returns the digest. * The handle is prepared for a new cycle, but adding bytes to the * handle will the destroy the returned buffer. * Returns: 64 bytes representing the digest. When used for sha384, * we take the leftmost 48 of those bytes. */ static void keccak_final (void *context) { KECCAK_CONTEXT *ctx = context; KECCAK_STATE *hd = &ctx->state; const size_t bsize = ctx->blocksize; const byte suffix = ctx->suffix; unsigned int nburn, burn = 0; unsigned int lastbytes; byte lane[8]; lastbytes = ctx->count; /* Do the padding and switch to the squeezing phase */ /* Absorb the last few bits and add the first bit of padding (which coincides with the delimiter in delimited suffix) */ buf_put_le64(lane, (u64)suffix << ((lastbytes % 8) * 8)); nburn = ctx->ops->absorb(&ctx->state, lastbytes / 8, lane, 1, -1); burn = nburn > burn ? nburn : burn; /* Add the second bit of padding. */ buf_put_le64(lane, (u64)0x80 << (((bsize - 1) % 8) * 8)); nburn = ctx->ops->absorb(&ctx->state, (bsize - 1) / 8, lane, 1, -1); burn = nburn > burn ? nburn : burn; if (suffix == SHA3_DELIMITED_SUFFIX) { /* Switch to the squeezing phase. */ nburn = ctx->ops->permute(hd); burn = nburn > burn ? nburn : burn; /* Squeeze out the SHA3 digest. */ nburn = ctx->ops->extract(hd, 0, (void *)hd, ctx->outlen); burn = nburn > burn ? nburn : burn; } else { /* Output for SHAKE can now be read with md_extract(). */ ctx->count = 0; } wipememory(lane, sizeof(lane)); if (burn) _gcry_burn_stack (burn); } static byte * keccak_read (void *context) { KECCAK_CONTEXT *ctx = (KECCAK_CONTEXT *) context; KECCAK_STATE *hd = &ctx->state; return (byte *)&hd->u; } static void keccak_extract (void *context, void *out, size_t outlen) { KECCAK_CONTEXT *ctx = context; KECCAK_STATE *hd = &ctx->state; const size_t bsize = ctx->blocksize; unsigned int nburn, burn = 0; byte *outbuf = out; unsigned int nlanes; unsigned int nleft; unsigned int count; unsigned int i; byte lane[8]; count = ctx->count; while (count && outlen && (outlen < 8 || count % 8)) { /* Extract partial lane. */ nburn = ctx->ops->extract(hd, count / 8, lane, 8); burn = nburn > burn ? nburn : burn; for (i = count % 8; outlen && i < 8; i++) { *outbuf++ = lane[i]; outlen--; count++; } gcry_assert(count <= bsize); if (count == bsize) count = 0; } if (outlen >= 8 && count) { /* Extract tail of partial block. */ nlanes = outlen / 8; nleft = (bsize - count) / 8; nlanes = nlanes < nleft ? nlanes : nleft; nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); burn = nburn > burn ? nburn : burn; outlen -= nlanes * 8; outbuf += nlanes * 8; count += nlanes * 8; gcry_assert(count <= bsize); if (count == bsize) count = 0; } while (outlen >= bsize) { gcry_assert(count == 0); /* Squeeze more. */ nburn = ctx->ops->permute(hd); burn = nburn > burn ? nburn : burn; /* Extract full block. */ nburn = ctx->ops->extract(hd, 0, outbuf, bsize); burn = nburn > burn ? nburn : burn; outlen -= bsize; outbuf += bsize; } if (outlen) { gcry_assert(outlen < bsize); if (count == 0) { /* Squeeze more. */ nburn = ctx->ops->permute(hd); burn = nburn > burn ? nburn : burn; } if (outlen >= 8) { /* Extract head of partial block. */ nlanes = outlen / 8; nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); burn = nburn > burn ? nburn : burn; outlen -= nlanes * 8; outbuf += nlanes * 8; count += nlanes * 8; gcry_assert(count < bsize); } if (outlen) { /* Extract head of partial lane. */ nburn = ctx->ops->extract(hd, count / 8, lane, 8); burn = nburn > burn ? nburn : burn; for (i = count % 8; outlen && i < 8; i++) { *outbuf++ = lane[i]; outlen--; count++; } gcry_assert(count < bsize); } } ctx->count = count; if (burn) _gcry_burn_stack (burn); } +/* Shortcut functions which puts the hash value of the supplied buffer + * into outbuf which must have a size of 'spec->mdlen' bytes. */ +static void +_gcry_sha3_hash_buffer (void *outbuf, const void *buffer, size_t length, + const gcry_md_spec_t *spec) +{ + KECCAK_CONTEXT hd; + + spec->init (&hd, 0); + keccak_write (&hd, buffer, length); + keccak_final (&hd); + memcpy (outbuf, keccak_read (&hd), spec->mdlen); +} + + +/* Variant of the above shortcut function using multiple buffers. */ +static void +_gcry_sha3_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt, + const gcry_md_spec_t *spec) +{ + KECCAK_CONTEXT hd; + + spec->init (&hd, 0); + for (;iovcnt > 0; iov++, iovcnt--) + keccak_write (&hd, (const char*)iov[0].data + iov[0].off, iov[0].len); + keccak_final (&hd); + memcpy (outbuf, keccak_read (&hd), spec->mdlen); +} + + +static void +_gcry_sha3_224_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_224); +} + +static void +_gcry_sha3_256_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_256); +} + +static void +_gcry_sha3_384_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_384); +} + +static void +_gcry_sha3_512_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_512); +} + +static void +_gcry_sha3_224_hash_buffers (void *outbuf, const gcry_buffer_t *iov, + int iovcnt) +{ + _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_224); +} + +static void +_gcry_sha3_256_hash_buffers (void *outbuf, const gcry_buffer_t *iov, + int iovcnt) +{ + _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_256); +} + +static void +_gcry_sha3_384_hash_buffers (void *outbuf, const gcry_buffer_t *iov, + int iovcnt) +{ + _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_384); +} + +static void +_gcry_sha3_512_hash_buffers (void *outbuf, const gcry_buffer_t *iov, + int iovcnt) +{ + _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_512); +} + /* Self-test section. */ static gpg_err_code_t selftests_keccak (int algo, int extended, selftest_report_func_t report) { const char *what; const char *errtxt; const char *short_hash; const char *long_hash; const char *one_million_a_hash; int hash_len; switch (algo) { default: BUG(); case GCRY_MD_SHA3_224: short_hash = "\xe6\x42\x82\x4c\x3f\x8c\xf2\x4a\xd0\x92\x34\xee\x7d\x3c\x76\x6f" "\xc9\xa3\xa5\x16\x8d\x0c\x94\xad\x73\xb4\x6f\xdf"; long_hash = "\x54\x3e\x68\x68\xe1\x66\x6c\x1a\x64\x36\x30\xdf\x77\x36\x7a\xe5" "\xa6\x2a\x85\x07\x0a\x51\xc1\x4c\xbf\x66\x5c\xbc"; one_million_a_hash = "\xd6\x93\x35\xb9\x33\x25\x19\x2e\x51\x6a\x91\x2e\x6d\x19\xa1\x5c" "\xb5\x1c\x6e\xd5\xc1\x52\x43\xe7\xa7\xfd\x65\x3c"; hash_len = 28; break; case GCRY_MD_SHA3_256: short_hash = "\x3a\x98\x5d\xa7\x4f\xe2\x25\xb2\x04\x5c\x17\x2d\x6b\xd3\x90\xbd" "\x85\x5f\x08\x6e\x3e\x9d\x52\x5b\x46\xbf\xe2\x45\x11\x43\x15\x32"; long_hash = "\x91\x6f\x60\x61\xfe\x87\x97\x41\xca\x64\x69\xb4\x39\x71\xdf\xdb" "\x28\xb1\xa3\x2d\xc3\x6c\xb3\x25\x4e\x81\x2b\xe2\x7a\xad\x1d\x18"; one_million_a_hash = "\x5c\x88\x75\xae\x47\x4a\x36\x34\xba\x4f\xd5\x5e\xc8\x5b\xff\xd6" "\x61\xf3\x2a\xca\x75\xc6\xd6\x99\xd0\xcd\xcb\x6c\x11\x58\x91\xc1"; hash_len = 32; break; case GCRY_MD_SHA3_384: short_hash = "\xec\x01\x49\x82\x88\x51\x6f\xc9\x26\x45\x9f\x58\xe2\xc6\xad\x8d" "\xf9\xb4\x73\xcb\x0f\xc0\x8c\x25\x96\xda\x7c\xf0\xe4\x9b\xe4\xb2" "\x98\xd8\x8c\xea\x92\x7a\xc7\xf5\x39\xf1\xed\xf2\x28\x37\x6d\x25"; long_hash = "\x79\x40\x7d\x3b\x59\x16\xb5\x9c\x3e\x30\xb0\x98\x22\x97\x47\x91" "\xc3\x13\xfb\x9e\xcc\x84\x9e\x40\x6f\x23\x59\x2d\x04\xf6\x25\xdc" "\x8c\x70\x9b\x98\xb4\x3b\x38\x52\xb3\x37\x21\x61\x79\xaa\x7f\xc7"; one_million_a_hash = "\xee\xe9\xe2\x4d\x78\xc1\x85\x53\x37\x98\x34\x51\xdf\x97\xc8\xad" "\x9e\xed\xf2\x56\xc6\x33\x4f\x8e\x94\x8d\x25\x2d\x5e\x0e\x76\x84" "\x7a\xa0\x77\x4d\xdb\x90\xa8\x42\x19\x0d\x2c\x55\x8b\x4b\x83\x40"; hash_len = 48; break; case GCRY_MD_SHA3_512: short_hash = "\xb7\x51\x85\x0b\x1a\x57\x16\x8a\x56\x93\xcd\x92\x4b\x6b\x09\x6e" "\x08\xf6\x21\x82\x74\x44\xf7\x0d\x88\x4f\x5d\x02\x40\xd2\x71\x2e" "\x10\xe1\x16\xe9\x19\x2a\xf3\xc9\x1a\x7e\xc5\x76\x47\xe3\x93\x40" "\x57\x34\x0b\x4c\xf4\x08\xd5\xa5\x65\x92\xf8\x27\x4e\xec\x53\xf0"; long_hash = "\xaf\xeb\xb2\xef\x54\x2e\x65\x79\xc5\x0c\xad\x06\xd2\xe5\x78\xf9" "\xf8\xdd\x68\x81\xd7\xdc\x82\x4d\x26\x36\x0f\xee\xbf\x18\xa4\xfa" "\x73\xe3\x26\x11\x22\x94\x8e\xfc\xfd\x49\x2e\x74\xe8\x2e\x21\x89" "\xed\x0f\xb4\x40\xd1\x87\xf3\x82\x27\x0c\xb4\x55\xf2\x1d\xd1\x85"; one_million_a_hash = "\x3c\x3a\x87\x6d\xa1\x40\x34\xab\x60\x62\x7c\x07\x7b\xb9\x8f\x7e" "\x12\x0a\x2a\x53\x70\x21\x2d\xff\xb3\x38\x5a\x18\xd4\xf3\x88\x59" "\xed\x31\x1d\x0a\x9d\x51\x41\xce\x9c\xc5\xc6\x6e\xe6\x89\xb2\x66" "\xa8\xaa\x18\xac\xe8\x28\x2a\x0e\x0d\xb5\x96\xc9\x0b\x0a\x7b\x87"; hash_len = 64; break; case GCRY_MD_SHAKE128: short_hash = "\x58\x81\x09\x2d\xd8\x18\xbf\x5c\xf8\xa3\xdd\xb7\x93\xfb\xcb\xa7" "\x40\x97\xd5\xc5\x26\xa6\xd3\x5f\x97\xb8\x33\x51\x94\x0f\x2c\xc8"; long_hash = "\x7b\x6d\xf6\xff\x18\x11\x73\xb6\xd7\x89\x8d\x7f\xf6\x3f\xb0\x7b" "\x7c\x23\x7d\xaf\x47\x1a\x5a\xe5\x60\x2a\xdb\xcc\xef\x9c\xcf\x4b"; one_million_a_hash = "\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11" "\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58"; hash_len = 32; break; case GCRY_MD_SHAKE256: short_hash = "\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11\x4d" "\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b\x57\x39"; long_hash = "\x98\xbe\x04\x51\x6c\x04\xcc\x73\x59\x3f\xef\x3e\xd0\x35\x2e\xa9" "\xf6\x44\x39\x42\xd6\x95\x0e\x29\xa3\x72\xa6\x81\xc3\xde\xaf\x45"; one_million_a_hash = "\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb" "\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a"; hash_len = 32; break; } what = "short string"; errtxt = _gcry_hash_selftest_check_one (algo, 0, "abc", 3, short_hash, hash_len); if (errtxt) goto failed; if (extended) { what = "long string"; errtxt = _gcry_hash_selftest_check_one (algo, 0, "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112, long_hash, hash_len); if (errtxt) goto failed; what = "one million \"a\""; errtxt = _gcry_hash_selftest_check_one (algo, 1, NULL, 0, one_million_a_hash, hash_len); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("digest", algo, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_MD_SHA3_224: case GCRY_MD_SHA3_256: case GCRY_MD_SHA3_384: case GCRY_MD_SHA3_512: case GCRY_MD_SHAKE128: case GCRY_MD_SHAKE256: ec = selftests_keccak (algo, extended, report); break; default: ec = GPG_ERR_DIGEST_ALGO; break; } return ec; } static byte sha3_224_asn[] = { 0x30 }; static gcry_md_oid_spec_t oid_spec_sha3_224[] = { { "2.16.840.1.101.3.4.2.7" }, /* PKCS#1 sha3_224WithRSAEncryption */ { "?" }, { NULL } }; static byte sha3_256_asn[] = { 0x30 }; static gcry_md_oid_spec_t oid_spec_sha3_256[] = { { "2.16.840.1.101.3.4.2.8" }, /* PKCS#1 sha3_256WithRSAEncryption */ { "?" }, { NULL } }; static byte sha3_384_asn[] = { 0x30 }; static gcry_md_oid_spec_t oid_spec_sha3_384[] = { { "2.16.840.1.101.3.4.2.9" }, /* PKCS#1 sha3_384WithRSAEncryption */ { "?" }, { NULL } }; static byte sha3_512_asn[] = { 0x30 }; static gcry_md_oid_spec_t oid_spec_sha3_512[] = { { "2.16.840.1.101.3.4.2.10" }, /* PKCS#1 sha3_512WithRSAEncryption */ { "?" }, { NULL } }; static byte shake128_asn[] = { 0x30 }; static gcry_md_oid_spec_t oid_spec_shake128[] = { { "2.16.840.1.101.3.4.2.11" }, /* PKCS#1 shake128WithRSAEncryption */ { "?" }, { NULL } }; static byte shake256_asn[] = { 0x30 }; static gcry_md_oid_spec_t oid_spec_shake256[] = { { "2.16.840.1.101.3.4.2.12" }, /* PKCS#1 shake256WithRSAEncryption */ { "?" }, { NULL } }; gcry_md_spec_t _gcry_digest_spec_sha3_224 = { GCRY_MD_SHA3_224, {0, 1}, "SHA3-224", sha3_224_asn, DIM (sha3_224_asn), oid_spec_sha3_224, 28, sha3_224_init, keccak_write, keccak_final, keccak_read, NULL, - NULL, NULL, + _gcry_sha3_224_hash_buffer, _gcry_sha3_224_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; gcry_md_spec_t _gcry_digest_spec_sha3_256 = { GCRY_MD_SHA3_256, {0, 1}, "SHA3-256", sha3_256_asn, DIM (sha3_256_asn), oid_spec_sha3_256, 32, sha3_256_init, keccak_write, keccak_final, keccak_read, NULL, - NULL, NULL, + _gcry_sha3_256_hash_buffer, _gcry_sha3_256_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; gcry_md_spec_t _gcry_digest_spec_sha3_384 = { GCRY_MD_SHA3_384, {0, 1}, "SHA3-384", sha3_384_asn, DIM (sha3_384_asn), oid_spec_sha3_384, 48, sha3_384_init, keccak_write, keccak_final, keccak_read, NULL, - NULL, NULL, + _gcry_sha3_384_hash_buffer, _gcry_sha3_384_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; gcry_md_spec_t _gcry_digest_spec_sha3_512 = { GCRY_MD_SHA3_512, {0, 1}, "SHA3-512", sha3_512_asn, DIM (sha3_512_asn), oid_spec_sha3_512, 64, sha3_512_init, keccak_write, keccak_final, keccak_read, NULL, - NULL, NULL, + _gcry_sha3_512_hash_buffer, _gcry_sha3_512_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; gcry_md_spec_t _gcry_digest_spec_shake128 = { GCRY_MD_SHAKE128, {0, 1}, "SHAKE128", shake128_asn, DIM (shake128_asn), oid_spec_shake128, 0, shake128_init, keccak_write, keccak_final, NULL, keccak_extract, NULL, NULL, sizeof (KECCAK_CONTEXT), run_selftests }; gcry_md_spec_t _gcry_digest_spec_shake256 = { GCRY_MD_SHAKE256, {0, 1}, "SHAKE256", shake256_asn, DIM (shake256_asn), oid_spec_shake256, 0, shake256_init, keccak_write, keccak_final, NULL, keccak_extract, NULL, NULL, sizeof (KECCAK_CONTEXT), run_selftests }; diff --git a/cipher/sha256.c b/cipher/sha256.c index 5c1c13f8..06959707 100644 --- a/cipher/sha256.c +++ b/cipher/sha256.c @@ -1,759 +1,788 @@ /* sha256.c - SHA256 hash function * Copyright (C) 2003, 2006, 2008, 2009 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ /* Test vectors: "abc" SHA224: 23097d22 3405d822 8642a477 bda255b3 2aadbce4 bda0b3f7 e36c9da7 SHA256: ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" SHA224: 75388b16 512776cc 5dba5da1 fd890150 b0c6455c b4f58b19 52522525 SHA256: 248d6a61 d20638b8 e5c02693 0c3e6039 a33ce459 64ff2167 f6ecedd4 19db06c1 "a" one million times SHA224: 20794655 980c91d8 bbb4c1ea 97618a4b f03f4258 1948b2ee 4ee7ad67 SHA256: cdc76e5c 9914fb92 81a1c7e2 84d73e67 f1809a48 a497200e 046d39cc c7112cd0 */ #include #include #include #include #include "g10lib.h" #include "bithelp.h" #include "bufhelp.h" #include "cipher.h" #include "hash-common.h" /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ #undef USE_SSSE3 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSSE3 1 #endif /* USE_AVX indicates whether to compile with Intel AVX code. */ #undef USE_AVX #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX 1 #endif /* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */ #undef USE_AVX2 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX2 1 #endif /* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */ #undef USE_SHAEXT #if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \ defined(HAVE_GCC_INLINE_ASM_SSE41) && \ defined(ENABLE_SHAEXT_SUPPORT) # define USE_SHAEXT 1 #endif /* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly * code. */ #undef USE_ARM_CE #ifdef ENABLE_ARM_CRYPTO_SUPPORT # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) # define USE_ARM_CE 1 # elif defined(__AARCH64EL__) \ && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) # define USE_ARM_CE 1 # endif #endif typedef struct { gcry_md_block_ctx_t bctx; u32 h0,h1,h2,h3,h4,h5,h6,h7; #ifdef USE_SSSE3 unsigned int use_ssse3:1; #endif #ifdef USE_AVX unsigned int use_avx:1; #endif #ifdef USE_AVX2 unsigned int use_avx2:1; #endif #ifdef USE_SHAEXT unsigned int use_shaext:1; #endif #ifdef USE_ARM_CE unsigned int use_arm_ce:1; #endif } SHA256_CONTEXT; static unsigned int transform (void *c, const unsigned char *data, size_t nblks); static void sha256_init (void *context, unsigned int flags) { SHA256_CONTEXT *hd = context; unsigned int features = _gcry_get_hw_features (); (void)flags; hd->h0 = 0x6a09e667; hd->h1 = 0xbb67ae85; hd->h2 = 0x3c6ef372; hd->h3 = 0xa54ff53a; hd->h4 = 0x510e527f; hd->h5 = 0x9b05688c; hd->h6 = 0x1f83d9ab; hd->h7 = 0x5be0cd19; hd->bctx.nblocks = 0; hd->bctx.nblocks_high = 0; hd->bctx.count = 0; hd->bctx.blocksize = 64; hd->bctx.bwrite = transform; #ifdef USE_SSSE3 hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; #endif #ifdef USE_AVX /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. * Therefore use this implementation on Intel CPUs only. */ hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_AVX2 hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); #endif #ifdef USE_SHAEXT hd->use_shaext = (features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1); #endif #ifdef USE_ARM_CE hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0; #endif (void)features; } static void sha224_init (void *context, unsigned int flags) { SHA256_CONTEXT *hd = context; unsigned int features = _gcry_get_hw_features (); (void)flags; hd->h0 = 0xc1059ed8; hd->h1 = 0x367cd507; hd->h2 = 0x3070dd17; hd->h3 = 0xf70e5939; hd->h4 = 0xffc00b31; hd->h5 = 0x68581511; hd->h6 = 0x64f98fa7; hd->h7 = 0xbefa4fa4; hd->bctx.nblocks = 0; hd->bctx.nblocks_high = 0; hd->bctx.count = 0; hd->bctx.blocksize = 64; hd->bctx.bwrite = transform; #ifdef USE_SSSE3 hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; #endif #ifdef USE_AVX /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. * Therefore use this implementation on Intel CPUs only. */ hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_AVX2 hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); #endif #ifdef USE_SHAEXT hd->use_shaext = (features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1); #endif #ifdef USE_ARM_CE hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0; #endif (void)features; } /* Transform the message X which consists of 16 32-bit-words. See FIPS 180-2 for details. */ #define R(a,b,c,d,e,f,g,h,k,w) do \ { \ t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + (k) + (w); \ t2 = Sum0((a)) + Maj((a),(b),(c)); \ d += t1; \ h = t1 + t2; \ } while (0) /* (4.2) same as SHA-1's F1. */ #define Cho(x, y, z) (z ^ (x & (y ^ z))) /* (4.3) same as SHA-1's F3 */ #define Maj(x, y, z) ((x & y) + (z & (x ^ y))) /* (4.4) */ #define Sum0(x) (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22)) /* (4.5) */ #define Sum1(x) (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25)) /* Message expansion */ #define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3)) /* (4.6) */ #define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10)) /* (4.7) */ #define I(i) ( w[i] = buf_get_be32(data + i * 4) ) #define W(i) ( w[i&0x0f] = S1(w[(i-2) &0x0f]) \ + w[(i-7) &0x0f] \ + S0(w[(i-15)&0x0f]) \ + w[(i-16)&0x0f] ) static unsigned int transform_blk (void *ctx, const unsigned char *data) { SHA256_CONTEXT *hd = ctx; static const u32 K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; u32 a,b,c,d,e,f,g,h,t1,t2; u32 w[16]; a = hd->h0; b = hd->h1; c = hd->h2; d = hd->h3; e = hd->h4; f = hd->h5; g = hd->h6; h = hd->h7; R(a, b, c, d, e, f, g, h, K[0], I(0)); R(h, a, b, c, d, e, f, g, K[1], I(1)); R(g, h, a, b, c, d, e, f, K[2], I(2)); R(f, g, h, a, b, c, d, e, K[3], I(3)); R(e, f, g, h, a, b, c, d, K[4], I(4)); R(d, e, f, g, h, a, b, c, K[5], I(5)); R(c, d, e, f, g, h, a, b, K[6], I(6)); R(b, c, d, e, f, g, h, a, K[7], I(7)); R(a, b, c, d, e, f, g, h, K[8], I(8)); R(h, a, b, c, d, e, f, g, K[9], I(9)); R(g, h, a, b, c, d, e, f, K[10], I(10)); R(f, g, h, a, b, c, d, e, K[11], I(11)); R(e, f, g, h, a, b, c, d, K[12], I(12)); R(d, e, f, g, h, a, b, c, K[13], I(13)); R(c, d, e, f, g, h, a, b, K[14], I(14)); R(b, c, d, e, f, g, h, a, K[15], I(15)); R(a, b, c, d, e, f, g, h, K[16], W(16)); R(h, a, b, c, d, e, f, g, K[17], W(17)); R(g, h, a, b, c, d, e, f, K[18], W(18)); R(f, g, h, a, b, c, d, e, K[19], W(19)); R(e, f, g, h, a, b, c, d, K[20], W(20)); R(d, e, f, g, h, a, b, c, K[21], W(21)); R(c, d, e, f, g, h, a, b, K[22], W(22)); R(b, c, d, e, f, g, h, a, K[23], W(23)); R(a, b, c, d, e, f, g, h, K[24], W(24)); R(h, a, b, c, d, e, f, g, K[25], W(25)); R(g, h, a, b, c, d, e, f, K[26], W(26)); R(f, g, h, a, b, c, d, e, K[27], W(27)); R(e, f, g, h, a, b, c, d, K[28], W(28)); R(d, e, f, g, h, a, b, c, K[29], W(29)); R(c, d, e, f, g, h, a, b, K[30], W(30)); R(b, c, d, e, f, g, h, a, K[31], W(31)); R(a, b, c, d, e, f, g, h, K[32], W(32)); R(h, a, b, c, d, e, f, g, K[33], W(33)); R(g, h, a, b, c, d, e, f, K[34], W(34)); R(f, g, h, a, b, c, d, e, K[35], W(35)); R(e, f, g, h, a, b, c, d, K[36], W(36)); R(d, e, f, g, h, a, b, c, K[37], W(37)); R(c, d, e, f, g, h, a, b, K[38], W(38)); R(b, c, d, e, f, g, h, a, K[39], W(39)); R(a, b, c, d, e, f, g, h, K[40], W(40)); R(h, a, b, c, d, e, f, g, K[41], W(41)); R(g, h, a, b, c, d, e, f, K[42], W(42)); R(f, g, h, a, b, c, d, e, K[43], W(43)); R(e, f, g, h, a, b, c, d, K[44], W(44)); R(d, e, f, g, h, a, b, c, K[45], W(45)); R(c, d, e, f, g, h, a, b, K[46], W(46)); R(b, c, d, e, f, g, h, a, K[47], W(47)); R(a, b, c, d, e, f, g, h, K[48], W(48)); R(h, a, b, c, d, e, f, g, K[49], W(49)); R(g, h, a, b, c, d, e, f, K[50], W(50)); R(f, g, h, a, b, c, d, e, K[51], W(51)); R(e, f, g, h, a, b, c, d, K[52], W(52)); R(d, e, f, g, h, a, b, c, K[53], W(53)); R(c, d, e, f, g, h, a, b, K[54], W(54)); R(b, c, d, e, f, g, h, a, K[55], W(55)); R(a, b, c, d, e, f, g, h, K[56], W(56)); R(h, a, b, c, d, e, f, g, K[57], W(57)); R(g, h, a, b, c, d, e, f, K[58], W(58)); R(f, g, h, a, b, c, d, e, K[59], W(59)); R(e, f, g, h, a, b, c, d, K[60], W(60)); R(d, e, f, g, h, a, b, c, K[61], W(61)); R(c, d, e, f, g, h, a, b, K[62], W(62)); R(b, c, d, e, f, g, h, a, K[63], W(63)); hd->h0 += a; hd->h1 += b; hd->h2 += c; hd->h3 += d; hd->h4 += e; hd->h5 += f; hd->h6 += g; hd->h7 += h; return /*burn_stack*/ 26*4+32; } #undef S0 #undef S1 #undef R /* Assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI #undef ASM_EXTRA_STACK #if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \ defined(USE_SHAEXT) # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define ASM_FUNC_ABI __attribute__((sysv_abi)) # define ASM_EXTRA_STACK (10 * 16) # else # define ASM_FUNC_ABI # define ASM_EXTRA_STACK 0 # endif #endif #ifdef USE_SSSE3 unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data, u32 state[8], size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_AVX unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data, u32 state[8], size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_AVX2 unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data, u32 state[8], size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_SHAEXT /* Does not need ASM_FUNC_ABI */ unsigned int _gcry_sha256_transform_intel_shaext(u32 state[8], const unsigned char *input_data, size_t num_blks); #endif #ifdef USE_ARM_CE unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8], const void *input_data, size_t num_blks); #endif static unsigned int transform (void *ctx, const unsigned char *data, size_t nblks) { SHA256_CONTEXT *hd = ctx; unsigned int burn; #ifdef USE_SHAEXT if (hd->use_shaext) { burn = _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks); burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0; return burn; } #endif #ifdef USE_AVX2 if (hd->use_avx2) { burn = _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks); burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0; return burn; } #endif #ifdef USE_AVX if (hd->use_avx) { burn = _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks); burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0; return burn; } #endif #ifdef USE_SSSE3 if (hd->use_ssse3) { burn = _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks); burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0; return burn; } #endif #ifdef USE_ARM_CE if (hd->use_arm_ce) { burn = _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks); burn += burn ? 4 * sizeof(void*) : 0; return burn; } #endif do { burn = transform_blk (hd, data); data += 64; } while (--nblks); #ifdef ASM_EXTRA_STACK /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to * here too. */ burn += ASM_EXTRA_STACK; #endif return burn; } /* The routine finally terminates the computation and returns the digest. The handle is prepared for a new cycle, but adding bytes to the handle will the destroy the returned buffer. Returns: 32 bytes with the message the digest. */ static void sha256_final(void *context) { SHA256_CONTEXT *hd = context; u32 t, th, msb, lsb; byte *p; unsigned int burn; _gcry_md_block_write (hd, NULL, 0); /* flush */; t = hd->bctx.nblocks; if (sizeof t == sizeof hd->bctx.nblocks) th = hd->bctx.nblocks_high; else th = hd->bctx.nblocks >> 32; /* multiply by 64 to make a byte count */ lsb = t << 6; msb = (th << 6) | (t >> 26); /* add the count */ t = lsb; if ((lsb += hd->bctx.count) < t) msb++; /* multiply by 8 to make a bit count */ t = lsb; lsb <<= 3; msb <<= 3; msb |= t >> 29; if (hd->bctx.count < 56) { /* enough room */ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ while (hd->bctx.count < 56) hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ } else { /* need one extra block */ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ while (hd->bctx.count < 64) hd->bctx.buf[hd->bctx.count++] = 0; _gcry_md_block_write (hd, NULL, 0); /* flush */; memset (hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */ } /* append the 64 bit count */ buf_put_be32(hd->bctx.buf + 56, msb); buf_put_be32(hd->bctx.buf + 60, lsb); burn = transform (hd, hd->bctx.buf, 1); _gcry_burn_stack (burn); p = hd->bctx.buf; #define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0) X(0); X(1); X(2); X(3); X(4); X(5); X(6); X(7); #undef X } static byte * sha256_read (void *context) { SHA256_CONTEXT *hd = context; return hd->bctx.buf; } /* Shortcut functions which puts the hash value of the supplied buffer * into outbuf which must have a size of 32 bytes. */ void _gcry_sha256_hash_buffer (void *outbuf, const void *buffer, size_t length) { SHA256_CONTEXT hd; sha256_init (&hd, 0); _gcry_md_block_write (&hd, buffer, length); sha256_final (&hd); memcpy (outbuf, hd.bctx.buf, 32); } /* Variant of the above shortcut function using multiple buffers. */ void _gcry_sha256_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt) { SHA256_CONTEXT hd; sha256_init (&hd, 0); for (;iovcnt > 0; iov++, iovcnt--) _gcry_md_block_write (&hd, (const char*)iov[0].data + iov[0].off, iov[0].len); sha256_final (&hd); memcpy (outbuf, hd.bctx.buf, 32); } +/* Shortcut functions which puts the hash value of the supplied buffer + * into outbuf which must have a size of 28 bytes. */ +static void +_gcry_sha224_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + SHA256_CONTEXT hd; + + sha224_init (&hd, 0); + _gcry_md_block_write (&hd, buffer, length); + sha256_final (&hd); + memcpy (outbuf, hd.bctx.buf, 28); +} + + +/* Variant of the above shortcut function using multiple buffers. */ +static void +_gcry_sha224_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt) +{ + SHA256_CONTEXT hd; + + sha224_init (&hd, 0); + for (;iovcnt > 0; iov++, iovcnt--) + _gcry_md_block_write (&hd, + (const char*)iov[0].data + iov[0].off, iov[0].len); + sha256_final (&hd); + memcpy (outbuf, hd.bctx.buf, 28); +} + + /* Self-test section. */ static gpg_err_code_t selftests_sha224 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "short string"; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA224, 0, "abc", 3, "\x23\x09\x7d\x22\x34\x05\xd8\x22\x86\x42\xa4\x77\xbd\xa2\x55\xb3" "\x2a\xad\xbc\xe4\xbd\xa0\xb3\xf7\xe3\x6c\x9d\xa7", 28); if (errtxt) goto failed; if (extended) { what = "long string"; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA224, 0, "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56, "\x75\x38\x8b\x16\x51\x27\x76\xcc\x5d\xba\x5d\xa1\xfd\x89\x01\x50" "\xb0\xc6\x45\x5c\xb4\xf5\x8b\x19\x52\x52\x25\x25", 28); if (errtxt) goto failed; what = "one million \"a\""; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA224, 1, NULL, 0, "\x20\x79\x46\x55\x98\x0c\x91\xd8\xbb\xb4\xc1\xea\x97\x61\x8a\x4b" "\xf0\x3f\x42\x58\x19\x48\xb2\xee\x4e\xe7\xad\x67", 28); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("digest", GCRY_MD_SHA224, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } static gpg_err_code_t selftests_sha256 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "short string"; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA256, 0, "abc", 3, "\xba\x78\x16\xbf\x8f\x01\xcf\xea\x41\x41\x40\xde\x5d\xae\x22\x23" "\xb0\x03\x61\xa3\x96\x17\x7a\x9c\xb4\x10\xff\x61\xf2\x00\x15\xad", 32); if (errtxt) goto failed; if (extended) { what = "long string"; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA256, 0, "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56, "\x24\x8d\x6a\x61\xd2\x06\x38\xb8\xe5\xc0\x26\x93\x0c\x3e\x60\x39" "\xa3\x3c\xe4\x59\x64\xff\x21\x67\xf6\xec\xed\xd4\x19\xdb\x06\xc1", 32); if (errtxt) goto failed; what = "one million \"a\""; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA256, 1, NULL, 0, "\xcd\xc7\x6e\x5c\x99\x14\xfb\x92\x81\xa1\xc7\xe2\x84\xd7\x3e\x67" "\xf1\x80\x9a\x48\xa4\x97\x20\x0e\x04\x6d\x39\xcc\xc7\x11\x2c\xd0", 32); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("digest", GCRY_MD_SHA256, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_MD_SHA224: ec = selftests_sha224 (extended, report); break; case GCRY_MD_SHA256: ec = selftests_sha256 (extended, report); break; default: ec = GPG_ERR_DIGEST_ALGO; break; } return ec; } static byte asn224[19] = /* Object ID is 2.16.840.1.101.3.4.2.4 */ { 0x30, 0x2D, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x05, 0x00, 0x04, 0x1C }; static gcry_md_oid_spec_t oid_spec_sha224[] = { /* From RFC3874, Section 4 */ { "2.16.840.1.101.3.4.2.4" }, { NULL }, }; static byte asn256[19] = /* Object ID is 2.16.840.1.101.3.4.2.1 */ { 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05, 0x00, 0x04, 0x20 }; static gcry_md_oid_spec_t oid_spec_sha256[] = { /* According to the OpenPGP draft rfc2440-bis06 */ { "2.16.840.1.101.3.4.2.1" }, /* PKCS#1 sha256WithRSAEncryption */ { "1.2.840.113549.1.1.11" }, { NULL }, }; gcry_md_spec_t _gcry_digest_spec_sha224 = { GCRY_MD_SHA224, {0, 1}, "SHA224", asn224, DIM (asn224), oid_spec_sha224, 28, sha224_init, _gcry_md_block_write, sha256_final, sha256_read, NULL, - NULL, NULL, + _gcry_sha224_hash_buffer, _gcry_sha224_hash_buffers, sizeof (SHA256_CONTEXT), run_selftests }; gcry_md_spec_t _gcry_digest_spec_sha256 = { GCRY_MD_SHA256, {0, 1}, "SHA256", asn256, DIM (asn256), oid_spec_sha256, 32, sha256_init, _gcry_md_block_write, sha256_final, sha256_read, NULL, _gcry_sha256_hash_buffer, _gcry_sha256_hash_buffers, sizeof (SHA256_CONTEXT), run_selftests }; diff --git a/cipher/sha512.c b/cipher/sha512.c index e83e84b8..9405de80 100644 --- a/cipher/sha512.c +++ b/cipher/sha512.c @@ -1,961 +1,991 @@ /* sha512.c - SHA384 and SHA512 hash functions * Copyright (C) 2003, 2008, 2009 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ /* Test vectors from FIPS-180-2: * * "abc" * 384: * CB00753F 45A35E8B B5A03D69 9AC65007 272C32AB 0EDED163 * 1A8B605A 43FF5BED 8086072B A1E7CC23 58BAECA1 34C825A7 * 512: * DDAF35A1 93617ABA CC417349 AE204131 12E6FA4E 89A97EA2 0A9EEEE6 4B55D39A * 2192992A 274FC1A8 36BA3C23 A3FEEBBD 454D4423 643CE80E 2A9AC94F A54CA49F * * "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" * 384: * 09330C33 F71147E8 3D192FC7 82CD1B47 53111B17 3B3B05D2 * 2FA08086 E3B0F712 FCC7C71A 557E2DB9 66C3E9FA 91746039 * 512: * 8E959B75 DAE313DA 8CF4F728 14FC143F 8F7779C6 EB9F7FA1 7299AEAD B6889018 * 501D289E 4900F7E4 331B99DE C4B5433A C7D329EE B6DD2654 5E96E55B 874BE909 * * "a" x 1000000 * 384: * 9D0E1809 716474CB 086E834E 310A4A1C ED149E9C 00F24852 * 7972CEC5 704C2A5B 07B8B3DC 38ECC4EB AE97DDD8 7F3D8985 * 512: * E718483D 0CE76964 4E2E42C7 BC15B463 8E1F98B1 3B204428 5632A803 AFA973EB * DE0FF244 877EA60A 4CB0432C E577C31B EB009C5C 2C49AA2E 4EADB217 AD8CC09B */ #include #include #include "g10lib.h" #include "bithelp.h" #include "bufhelp.h" #include "cipher.h" #include "hash-common.h" /* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */ #undef USE_ARM_NEON_ASM #ifdef ENABLE_NEON_SUPPORT # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_NEON) # define USE_ARM_NEON_ASM 1 # endif #endif /*ENABLE_NEON_SUPPORT*/ /* USE_ARM_ASM indicates whether to enable ARM assembly code. */ #undef USE_ARM_ASM #if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) # define USE_ARM_ASM 1 #endif /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ #undef USE_SSSE3 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSSE3 1 #endif /* USE_AVX indicates whether to compile with Intel AVX code. */ #undef USE_AVX #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX 1 #endif /* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */ #undef USE_AVX2 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX2 1 #endif typedef struct { u64 h0, h1, h2, h3, h4, h5, h6, h7; } SHA512_STATE; typedef struct { gcry_md_block_ctx_t bctx; SHA512_STATE state; #ifdef USE_ARM_NEON_ASM unsigned int use_neon:1; #endif #ifdef USE_SSSE3 unsigned int use_ssse3:1; #endif #ifdef USE_AVX unsigned int use_avx:1; #endif #ifdef USE_AVX2 unsigned int use_avx2:1; #endif } SHA512_CONTEXT; static unsigned int transform (void *context, const unsigned char *data, size_t nblks); static void sha512_init (void *context, unsigned int flags) { SHA512_CONTEXT *ctx = context; SHA512_STATE *hd = &ctx->state; unsigned int features = _gcry_get_hw_features (); (void)flags; hd->h0 = U64_C(0x6a09e667f3bcc908); hd->h1 = U64_C(0xbb67ae8584caa73b); hd->h2 = U64_C(0x3c6ef372fe94f82b); hd->h3 = U64_C(0xa54ff53a5f1d36f1); hd->h4 = U64_C(0x510e527fade682d1); hd->h5 = U64_C(0x9b05688c2b3e6c1f); hd->h6 = U64_C(0x1f83d9abfb41bd6b); hd->h7 = U64_C(0x5be0cd19137e2179); ctx->bctx.nblocks = 0; ctx->bctx.nblocks_high = 0; ctx->bctx.count = 0; ctx->bctx.blocksize = 128; ctx->bctx.bwrite = transform; #ifdef USE_ARM_NEON_ASM ctx->use_neon = (features & HWF_ARM_NEON) != 0; #endif #ifdef USE_SSSE3 ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; #endif #ifdef USE_AVX ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_AVX2 ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); #endif (void)features; } static void sha384_init (void *context, unsigned int flags) { SHA512_CONTEXT *ctx = context; SHA512_STATE *hd = &ctx->state; unsigned int features = _gcry_get_hw_features (); (void)flags; hd->h0 = U64_C(0xcbbb9d5dc1059ed8); hd->h1 = U64_C(0x629a292a367cd507); hd->h2 = U64_C(0x9159015a3070dd17); hd->h3 = U64_C(0x152fecd8f70e5939); hd->h4 = U64_C(0x67332667ffc00b31); hd->h5 = U64_C(0x8eb44a8768581511); hd->h6 = U64_C(0xdb0c2e0d64f98fa7); hd->h7 = U64_C(0x47b5481dbefa4fa4); ctx->bctx.nblocks = 0; ctx->bctx.nblocks_high = 0; ctx->bctx.count = 0; ctx->bctx.blocksize = 128; ctx->bctx.bwrite = transform; #ifdef USE_ARM_NEON_ASM ctx->use_neon = (features & HWF_ARM_NEON) != 0; #endif #ifdef USE_SSSE3 ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; #endif #ifdef USE_AVX ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_AVX2 ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); #endif (void)features; } static const u64 k[] = { U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd), U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc), U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019), U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118), U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe), U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2), U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1), U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694), U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3), U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65), U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483), U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5), U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210), U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4), U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725), U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70), U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926), U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df), U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8), U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b), U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001), U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30), U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910), U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8), U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53), U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8), U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb), U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3), U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60), U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec), U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9), U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b), U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207), U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178), U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6), U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b), U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493), U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c), U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a), U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817) }; #ifndef USE_ARM_ASM static inline u64 ROTR (u64 x, u64 n) { return ((x >> n) | (x << (64 - n))); } static inline u64 Ch (u64 x, u64 y, u64 z) { return ((x & y) ^ ( ~x & z)); } static inline u64 Maj (u64 x, u64 y, u64 z) { return ((x & y) ^ (x & z) ^ (y & z)); } static inline u64 Sum0 (u64 x) { return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39)); } static inline u64 Sum1 (u64 x) { return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41)); } /**************** * Transform the message W which consists of 16 64-bit-words */ static unsigned int transform_blk (SHA512_STATE *hd, const unsigned char *data) { u64 a, b, c, d, e, f, g, h; u64 w[16]; int t; /* get values from the chaining vars */ a = hd->h0; b = hd->h1; c = hd->h2; d = hd->h3; e = hd->h4; f = hd->h5; g = hd->h6; h = hd->h7; for ( t = 0; t < 16; t++ ) w[t] = buf_get_be64(data + t * 8); #define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) #define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) for (t = 0; t < 80 - 16; ) { u64 t1, t2; /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes initialized to 0,1,2,3...255,0,... and 1000 iterations: Not unrolled with macros: 440ms Unrolled with macros: 350ms Unrolled with inline: 330ms */ #if 0 /* Not unrolled. */ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16]; w[t%16] += S1 (w[(t - 2)%16]) + w[(t - 7)%16] + S0 (w[(t - 15)%16]); t2 = Sum0 (a) + Maj (a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; t++; #else /* Unrolled to interweave the chain variables. */ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0]; w[0] += S1 (w[14]) + w[9] + S0 (w[1]); t2 = Sum0 (a) + Maj (a, b, c); d += t1; h = t1 + t2; t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1]; w[1] += S1 (w[15]) + w[10] + S0 (w[2]); t2 = Sum0 (h) + Maj (h, a, b); c += t1; g = t1 + t2; t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2]; w[2] += S1 (w[0]) + w[11] + S0 (w[3]); t2 = Sum0 (g) + Maj (g, h, a); b += t1; f = t1 + t2; t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3]; w[3] += S1 (w[1]) + w[12] + S0 (w[4]); t2 = Sum0 (f) + Maj (f, g, h); a += t1; e = t1 + t2; t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4]; w[4] += S1 (w[2]) + w[13] + S0 (w[5]); t2 = Sum0 (e) + Maj (e, f, g); h += t1; d = t1 + t2; t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5]; w[5] += S1 (w[3]) + w[14] + S0 (w[6]); t2 = Sum0 (d) + Maj (d, e, f); g += t1; c = t1 + t2; t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6]; w[6] += S1 (w[4]) + w[15] + S0 (w[7]); t2 = Sum0 (c) + Maj (c, d, e); f += t1; b = t1 + t2; t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7]; w[7] += S1 (w[5]) + w[0] + S0 (w[8]); t2 = Sum0 (b) + Maj (b, c, d); e += t1; a = t1 + t2; t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8]; w[8] += S1 (w[6]) + w[1] + S0 (w[9]); t2 = Sum0 (a) + Maj (a, b, c); d += t1; h = t1 + t2; t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9]; w[9] += S1 (w[7]) + w[2] + S0 (w[10]); t2 = Sum0 (h) + Maj (h, a, b); c += t1; g = t1 + t2; t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10]; w[10] += S1 (w[8]) + w[3] + S0 (w[11]); t2 = Sum0 (g) + Maj (g, h, a); b += t1; f = t1 + t2; t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11]; w[11] += S1 (w[9]) + w[4] + S0 (w[12]); t2 = Sum0 (f) + Maj (f, g, h); a += t1; e = t1 + t2; t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12]; w[12] += S1 (w[10]) + w[5] + S0 (w[13]); t2 = Sum0 (e) + Maj (e, f, g); h += t1; d = t1 + t2; t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13]; w[13] += S1 (w[11]) + w[6] + S0 (w[14]); t2 = Sum0 (d) + Maj (d, e, f); g += t1; c = t1 + t2; t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14]; w[14] += S1 (w[12]) + w[7] + S0 (w[15]); t2 = Sum0 (c) + Maj (c, d, e); f += t1; b = t1 + t2; t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15]; w[15] += S1 (w[13]) + w[8] + S0 (w[0]); t2 = Sum0 (b) + Maj (b, c, d); e += t1; a = t1 + t2; t += 16; #endif } for (; t < 80; ) { u64 t1, t2; #if 0 /* Not unrolled. */ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16]; t2 = Sum0 (a) + Maj (a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; t++; #else /* Unrolled to interweave the chain variables. */ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0]; t2 = Sum0 (a) + Maj (a, b, c); d += t1; h = t1 + t2; t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1]; t2 = Sum0 (h) + Maj (h, a, b); c += t1; g = t1 + t2; t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2]; t2 = Sum0 (g) + Maj (g, h, a); b += t1; f = t1 + t2; t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3]; t2 = Sum0 (f) + Maj (f, g, h); a += t1; e = t1 + t2; t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4]; t2 = Sum0 (e) + Maj (e, f, g); h += t1; d = t1 + t2; t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5]; t2 = Sum0 (d) + Maj (d, e, f); g += t1; c = t1 + t2; t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6]; t2 = Sum0 (c) + Maj (c, d, e); f += t1; b = t1 + t2; t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7]; t2 = Sum0 (b) + Maj (b, c, d); e += t1; a = t1 + t2; t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8]; t2 = Sum0 (a) + Maj (a, b, c); d += t1; h = t1 + t2; t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9]; t2 = Sum0 (h) + Maj (h, a, b); c += t1; g = t1 + t2; t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10]; t2 = Sum0 (g) + Maj (g, h, a); b += t1; f = t1 + t2; t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11]; t2 = Sum0 (f) + Maj (f, g, h); a += t1; e = t1 + t2; t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12]; t2 = Sum0 (e) + Maj (e, f, g); h += t1; d = t1 + t2; t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13]; t2 = Sum0 (d) + Maj (d, e, f); g += t1; c = t1 + t2; t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14]; t2 = Sum0 (c) + Maj (c, d, e); f += t1; b = t1 + t2; t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15]; t2 = Sum0 (b) + Maj (b, c, d); e += t1; a = t1 + t2; t += 16; #endif } /* Update chaining vars. */ hd->h0 += a; hd->h1 += b; hd->h2 += c; hd->h3 += d; hd->h4 += e; hd->h5 += f; hd->h6 += g; hd->h7 += h; return /* burn_stack */ (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*); } #endif /*!USE_ARM_ASM*/ /* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI #undef ASM_EXTRA_STACK #if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define ASM_FUNC_ABI __attribute__((sysv_abi)) # define ASM_EXTRA_STACK (10 * 16) # else # define ASM_FUNC_ABI # define ASM_EXTRA_STACK 0 # endif #endif #ifdef USE_ARM_NEON_ASM void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd, const unsigned char *data, const u64 k[], size_t num_blks); #endif #ifdef USE_ARM_ASM unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd, const unsigned char *data, const u64 k[], size_t num_blks); #endif #ifdef USE_SSSE3 unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data, void *state, size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_AVX unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data, void *state, size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_AVX2 unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data, void *state, size_t num_blks) ASM_FUNC_ABI; #endif static unsigned int transform (void *context, const unsigned char *data, size_t nblks) { SHA512_CONTEXT *ctx = context; unsigned int burn; #ifdef USE_AVX2 if (ctx->use_avx2) return _gcry_sha512_transform_amd64_avx2 (data, &ctx->state, nblks) + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_AVX if (ctx->use_avx) return _gcry_sha512_transform_amd64_avx (data, &ctx->state, nblks) + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_SSSE3 if (ctx->use_ssse3) return _gcry_sha512_transform_amd64_ssse3 (data, &ctx->state, nblks) + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_ARM_NEON_ASM if (ctx->use_neon) { _gcry_sha512_transform_armv7_neon (&ctx->state, data, k, nblks); /* _gcry_sha512_transform_armv7_neon does not store sensitive data * to stack. */ return /* no burn_stack */ 0; } #endif #ifdef USE_ARM_ASM burn = _gcry_sha512_transform_arm (&ctx->state, data, k, nblks); #else do { burn = transform_blk (&ctx->state, data) + 3 * sizeof(void*); data += 128; } while (--nblks); #ifdef ASM_EXTRA_STACK /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to * here too. */ burn += ASM_EXTRA_STACK; #endif #endif return burn; } /* The routine final terminates the computation and * returns the digest. * The handle is prepared for a new cycle, but adding bytes to the * handle will the destroy the returned buffer. * Returns: 64 bytes representing the digest. When used for sha384, * we take the leftmost 48 of those bytes. */ static void sha512_final (void *context) { SHA512_CONTEXT *hd = context; unsigned int stack_burn_depth; u64 t, th, msb, lsb; byte *p; _gcry_md_block_write (context, NULL, 0); /* flush */ ; t = hd->bctx.nblocks; /* if (sizeof t == sizeof hd->bctx.nblocks) */ th = hd->bctx.nblocks_high; /* else */ /* th = hd->bctx.nblocks >> 64; In case we ever use u128 */ /* multiply by 128 to make a byte count */ lsb = t << 7; msb = (th << 7) | (t >> 57); /* add the count */ t = lsb; if ((lsb += hd->bctx.count) < t) msb++; /* multiply by 8 to make a bit count */ t = lsb; lsb <<= 3; msb <<= 3; msb |= t >> 61; if (hd->bctx.count < 112) { /* enough room */ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ while (hd->bctx.count < 112) hd->bctx.buf[hd->bctx.count++] = 0; /* pad */ } else { /* need one extra block */ hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ while (hd->bctx.count < 128) hd->bctx.buf[hd->bctx.count++] = 0; _gcry_md_block_write (context, NULL, 0); /* flush */ ; memset (hd->bctx.buf, 0, 112); /* fill next block with zeroes */ } /* append the 128 bit count */ buf_put_be64(hd->bctx.buf + 112, msb); buf_put_be64(hd->bctx.buf + 120, lsb); stack_burn_depth = transform (hd, hd->bctx.buf, 1); _gcry_burn_stack (stack_burn_depth); p = hd->bctx.buf; #define X(a) do { buf_put_be64(p, hd->state.h##a); p += 8; } while (0) X (0); X (1); X (2); X (3); X (4); X (5); /* Note that these last two chunks are included even for SHA384. We just ignore them. */ X (6); X (7); #undef X } static byte * sha512_read (void *context) { SHA512_CONTEXT *hd = (SHA512_CONTEXT *) context; return hd->bctx.buf; } /* Shortcut functions which puts the hash value of the supplied buffer * into outbuf which must have a size of 64 bytes. */ void _gcry_sha512_hash_buffer (void *outbuf, const void *buffer, size_t length) { SHA512_CONTEXT hd; sha512_init (&hd, 0); _gcry_md_block_write (&hd, buffer, length); sha512_final (&hd); memcpy (outbuf, hd.bctx.buf, 64); } /* Variant of the above shortcut function using multiple buffers. */ void _gcry_sha512_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt) { SHA512_CONTEXT hd; sha512_init (&hd, 0); for (;iovcnt > 0; iov++, iovcnt--) _gcry_md_block_write (&hd, (const char*)iov[0].data + iov[0].off, iov[0].len); sha512_final (&hd); memcpy (outbuf, hd.bctx.buf, 64); } + +/* Shortcut functions which puts the hash value of the supplied buffer + * into outbuf which must have a size of 48 bytes. */ +static void +_gcry_sha384_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + SHA512_CONTEXT hd; + + sha384_init (&hd, 0); + _gcry_md_block_write (&hd, buffer, length); + sha512_final (&hd); + memcpy (outbuf, hd.bctx.buf, 48); +} + + +/* Variant of the above shortcut function using multiple buffers. */ +static void +_gcry_sha384_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt) +{ + SHA512_CONTEXT hd; + + sha384_init (&hd, 0); + for (;iovcnt > 0; iov++, iovcnt--) + _gcry_md_block_write (&hd, + (const char*)iov[0].data + iov[0].off, iov[0].len); + sha512_final (&hd); + memcpy (outbuf, hd.bctx.buf, 48); +} + + /* Self-test section. */ static gpg_err_code_t selftests_sha384 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "short string"; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA384, 0, "abc", 3, "\xcb\x00\x75\x3f\x45\xa3\x5e\x8b\xb5\xa0\x3d\x69\x9a\xc6\x50\x07" "\x27\x2c\x32\xab\x0e\xde\xd1\x63\x1a\x8b\x60\x5a\x43\xff\x5b\xed" "\x80\x86\x07\x2b\xa1\xe7\xcc\x23\x58\xba\xec\xa1\x34\xc8\x25\xa7", 48); if (errtxt) goto failed; if (extended) { what = "long string"; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA384, 0, "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112, "\x09\x33\x0C\x33\xF7\x11\x47\xE8\x3D\x19\x2F\xC7\x82\xCD\x1B\x47" "\x53\x11\x1B\x17\x3B\x3B\x05\xD2\x2F\xA0\x80\x86\xE3\xB0\xF7\x12" "\xFC\xC7\xC7\x1A\x55\x7E\x2D\xB9\x66\xC3\xE9\xFA\x91\x74\x60\x39", 48); if (errtxt) goto failed; what = "one million \"a\""; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA384, 1, NULL, 0, "\x9D\x0E\x18\x09\x71\x64\x74\xCB\x08\x6E\x83\x4E\x31\x0A\x4A\x1C" "\xED\x14\x9E\x9C\x00\xF2\x48\x52\x79\x72\xCE\xC5\x70\x4C\x2A\x5B" "\x07\xB8\xB3\xDC\x38\xEC\xC4\xEB\xAE\x97\xDD\xD8\x7F\x3D\x89\x85", 48); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("digest", GCRY_MD_SHA384, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } static gpg_err_code_t selftests_sha512 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "short string"; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA512, 0, "abc", 3, "\xDD\xAF\x35\xA1\x93\x61\x7A\xBA\xCC\x41\x73\x49\xAE\x20\x41\x31" "\x12\xE6\xFA\x4E\x89\xA9\x7E\xA2\x0A\x9E\xEE\xE6\x4B\x55\xD3\x9A" "\x21\x92\x99\x2A\x27\x4F\xC1\xA8\x36\xBA\x3C\x23\xA3\xFE\xEB\xBD" "\x45\x4D\x44\x23\x64\x3C\xE8\x0E\x2A\x9A\xC9\x4F\xA5\x4C\xA4\x9F", 64); if (errtxt) goto failed; if (extended) { what = "long string"; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA512, 0, "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112, "\x8E\x95\x9B\x75\xDA\xE3\x13\xDA\x8C\xF4\xF7\x28\x14\xFC\x14\x3F" "\x8F\x77\x79\xC6\xEB\x9F\x7F\xA1\x72\x99\xAE\xAD\xB6\x88\x90\x18" "\x50\x1D\x28\x9E\x49\x00\xF7\xE4\x33\x1B\x99\xDE\xC4\xB5\x43\x3A" "\xC7\xD3\x29\xEE\xB6\xDD\x26\x54\x5E\x96\xE5\x5B\x87\x4B\xE9\x09", 64); if (errtxt) goto failed; what = "one million \"a\""; errtxt = _gcry_hash_selftest_check_one (GCRY_MD_SHA512, 1, NULL, 0, "\xE7\x18\x48\x3D\x0C\xE7\x69\x64\x4E\x2E\x42\xC7\xBC\x15\xB4\x63" "\x8E\x1F\x98\xB1\x3B\x20\x44\x28\x56\x32\xA8\x03\xAF\xA9\x73\xEB" "\xDE\x0F\xF2\x44\x87\x7E\xA6\x0A\x4C\xB0\x43\x2C\xE5\x77\xC3\x1B" "\xEB\x00\x9C\x5C\x2C\x49\xAA\x2E\x4E\xAD\xB2\x17\xAD\x8C\xC0\x9B", 64); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("digest", GCRY_MD_SHA512, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_MD_SHA384: ec = selftests_sha384 (extended, report); break; case GCRY_MD_SHA512: ec = selftests_sha512 (extended, report); break; default: ec = GPG_ERR_DIGEST_ALGO; break; } return ec; } static byte sha512_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.3 */ { 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x05, 0x00, 0x04, 0x40 }; static gcry_md_oid_spec_t oid_spec_sha512[] = { { "2.16.840.1.101.3.4.2.3" }, /* PKCS#1 sha512WithRSAEncryption */ { "1.2.840.113549.1.1.13" }, { NULL } }; gcry_md_spec_t _gcry_digest_spec_sha512 = { GCRY_MD_SHA512, {0, 1}, "SHA512", sha512_asn, DIM (sha512_asn), oid_spec_sha512, 64, sha512_init, _gcry_md_block_write, sha512_final, sha512_read, NULL, _gcry_sha512_hash_buffer, _gcry_sha512_hash_buffers, sizeof (SHA512_CONTEXT), run_selftests }; static byte sha384_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.2 */ { 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, 0x05, 0x00, 0x04, 0x30 }; static gcry_md_oid_spec_t oid_spec_sha384[] = { { "2.16.840.1.101.3.4.2.2" }, /* PKCS#1 sha384WithRSAEncryption */ { "1.2.840.113549.1.1.12" }, /* SHA384WithECDSA: RFC 7427 (A.3.3.) */ { "1.2.840.10045.4.3.3" }, { NULL }, }; gcry_md_spec_t _gcry_digest_spec_sha384 = { GCRY_MD_SHA384, {0, 1}, "SHA384", sha384_asn, DIM (sha384_asn), oid_spec_sha384, 48, sha384_init, _gcry_md_block_write, sha512_final, sha512_read, NULL, - NULL, NULL, + _gcry_sha384_hash_buffer, _gcry_sha384_hash_buffers, sizeof (SHA512_CONTEXT), run_selftests };