diff --git a/cipher/sha1.c b/cipher/sha1.c
index e50262ff..76c486c7 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -1,671 +1,664 @@
/* sha1.c - SHA1 hash function
* Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see .
*/
/* Test vectors:
*
* "abc"
* A999 3E36 4706 816A BA3E 2571 7850 C26C 9CD0 D89D
*
* "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
* 8498 3E44 1C3B D26E BAAE 4AA1 F951 29E5 E546 70F1
*/
#include
#include
#include
#include
#ifdef HAVE_STDINT_H
# include
#endif
#include "g10lib.h"
#include "bithelp.h"
#include "bufhelp.h"
#include "cipher.h"
#include "sha1.h"
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSSE3 1
#endif
/* USE_AVX indicates whether to compile with Intel AVX code. */
#undef USE_AVX
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX 1
#endif
/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
#undef USE_BMI2
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_BMI2 1
#endif
/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
#undef USE_SHAEXT
#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
defined(HAVE_GCC_INLINE_ASM_SSE41) && \
defined(ENABLE_SHAEXT_SUPPORT)
# define USE_SHAEXT 1
#endif
/* USE_NEON indicates whether to enable ARM NEON assembly code. */
#undef USE_NEON
#ifdef ENABLE_NEON_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_NEON)
# define USE_NEON 1
# endif
#endif
/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
* code. */
#undef USE_ARM_CE
#ifdef ENABLE_ARM_CRYPTO_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
# define USE_ARM_CE 1
# elif defined(__AARCH64EL__) \
&& defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
# define USE_ARM_CE 1
# endif
#endif
/* A macro to test whether P is properly aligned for an u32 type.
Note that config.h provides a suitable replacement for uintptr_t if
it does not exist in stdint.h. */
/* #if __GNUC__ >= 2 */
/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % __alignof__ (u32))) */
/* #else */
/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % sizeof (u32))) */
/* #endif */
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) || \
+ defined(USE_SHAEXT)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_SSSE3
+unsigned int
+_gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX
+unsigned int
+_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_BMI2
+unsigned int
+_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_SHAEXT
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha1_transform_intel_shaext (void *state, const unsigned char *data,
+ size_t nblks);
+
static unsigned int
-transform (void *c, const unsigned char *data, size_t nblks);
+do_sha1_transform_intel_shaext (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_NEON
+unsigned int
+_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data,
+ size_t nblks);
+
+static unsigned int
+do_sha1_transform_armv7_neon (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_ARM_CE
+unsigned int
+_gcry_sha1_transform_armv8_ce (void *state, const unsigned char *data,
+ size_t nblks);
+
+static unsigned int
+do_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
+}
+#endif
+
+
+static unsigned int
+do_transform_generic (void *c, const unsigned char *data, size_t nblks);
static void
sha1_init (void *context, unsigned int flags)
{
SHA1_CONTEXT *hd = context;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
hd->h0 = 0x67452301;
hd->h1 = 0xefcdab89;
hd->h2 = 0x98badcfe;
hd->h3 = 0x10325476;
hd->h4 = 0xc3d2e1f0;
hd->bctx.nblocks = 0;
hd->bctx.nblocks_high = 0;
hd->bctx.count = 0;
hd->bctx.blocksize = 64;
- hd->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ hd->bctx.bwrite = do_transform_generic;
#ifdef USE_SSSE3
- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ hd->bctx.bwrite = do_sha1_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ hd->bctx.bwrite = do_sha1_transform_amd64_avx;
#endif
#ifdef USE_BMI2
- hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha1_transform_amd64_avx_bmi2;
#endif
#ifdef USE_SHAEXT
- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
- && (features & HWF_INTEL_SSE4_1);
+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+ hd->bctx.bwrite = do_sha1_transform_intel_shaext;
#endif
#ifdef USE_NEON
- hd->use_neon = (features & HWF_ARM_NEON) != 0;
+ if ((features & HWF_ARM_NEON) != 0)
+ hd->bctx.bwrite = do_sha1_transform_armv7_neon;
#endif
#ifdef USE_ARM_CE
- hd->use_arm_ce = (features & HWF_ARM_SHA1) != 0;
+ if ((features & HWF_ARM_SHA1) != 0)
+ hd->bctx.bwrite = do_sha1_transform_armv8_ce;
#endif
+
(void)features;
}
/*
* Initialize the context HD. This is used to prepare the use of
* _gcry_sha1_mixblock. WARNING: This is a special purpose function
* for exclusive use by random-csprng.c.
*/
void
_gcry_sha1_mixblock_init (SHA1_CONTEXT *hd)
{
sha1_init (hd, 0);
}
/* Round function macros. */
#define K1 0x5A827999L
#define K2 0x6ED9EBA1L
#define K3 0x8F1BBCDCL
#define K4 0xCA62C1D6L
#define F1(x,y,z) ( z ^ ( x & ( y ^ z ) ) )
#define F2(x,y,z) ( x ^ y ^ z )
#define F3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) )
#define F4(x,y,z) ( x ^ y ^ z )
#define M(i) ( tm = x[ i &0x0f] \
^ x[(i-14)&0x0f] \
^ x[(i-8) &0x0f] \
^ x[(i-3) &0x0f], \
(x[i&0x0f] = rol(tm, 1)))
#define R(a,b,c,d,e,f,k,m) do { e += rol( a, 5 ) \
+ f( b, c, d ) \
+ k \
+ m; \
b = rol( b, 30 ); \
} while(0)
-
-#ifdef USE_NEON
-unsigned int
-_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data,
- size_t nblks);
-#endif
-
-#ifdef USE_ARM_CE
-unsigned int
-_gcry_sha1_transform_armv8_ce (void *state, const unsigned char *data,
- size_t nblks);
-#endif
-
/*
* Transform NBLOCKS of each 64 bytes (16 32-bit words) at DATA.
*/
static unsigned int
-transform_blk (void *ctx, const unsigned char *data)
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
{
SHA1_CONTEXT *hd = ctx;
- const u32 *idata = (const void *)data;
- register u32 a, b, c, d, e; /* Local copies of the chaining variables. */
- register u32 tm; /* Helper. */
- u32 x[16]; /* The array we work on. */
+
+ do
+ {
+ const u32 *idata = (const void *)data;
+ u32 a, b, c, d, e; /* Local copies of the chaining variables. */
+ u32 tm; /* Helper. */
+ u32 x[16]; /* The array we work on. */
#define I(i) (x[i] = buf_get_be32(idata + i))
/* Get the values of the chaining variables. */
a = hd->h0;
b = hd->h1;
c = hd->h2;
d = hd->h3;
e = hd->h4;
/* Transform. */
R( a, b, c, d, e, F1, K1, I( 0) );
R( e, a, b, c, d, F1, K1, I( 1) );
R( d, e, a, b, c, F1, K1, I( 2) );
R( c, d, e, a, b, F1, K1, I( 3) );
R( b, c, d, e, a, F1, K1, I( 4) );
R( a, b, c, d, e, F1, K1, I( 5) );
R( e, a, b, c, d, F1, K1, I( 6) );
R( d, e, a, b, c, F1, K1, I( 7) );
R( c, d, e, a, b, F1, K1, I( 8) );
R( b, c, d, e, a, F1, K1, I( 9) );
R( a, b, c, d, e, F1, K1, I(10) );
R( e, a, b, c, d, F1, K1, I(11) );
R( d, e, a, b, c, F1, K1, I(12) );
R( c, d, e, a, b, F1, K1, I(13) );
R( b, c, d, e, a, F1, K1, I(14) );
R( a, b, c, d, e, F1, K1, I(15) );
R( e, a, b, c, d, F1, K1, M(16) );
R( d, e, a, b, c, F1, K1, M(17) );
R( c, d, e, a, b, F1, K1, M(18) );
R( b, c, d, e, a, F1, K1, M(19) );
R( a, b, c, d, e, F2, K2, M(20) );
R( e, a, b, c, d, F2, K2, M(21) );
R( d, e, a, b, c, F2, K2, M(22) );
R( c, d, e, a, b, F2, K2, M(23) );
R( b, c, d, e, a, F2, K2, M(24) );
R( a, b, c, d, e, F2, K2, M(25) );
R( e, a, b, c, d, F2, K2, M(26) );
R( d, e, a, b, c, F2, K2, M(27) );
R( c, d, e, a, b, F2, K2, M(28) );
R( b, c, d, e, a, F2, K2, M(29) );
R( a, b, c, d, e, F2, K2, M(30) );
R( e, a, b, c, d, F2, K2, M(31) );
R( d, e, a, b, c, F2, K2, M(32) );
R( c, d, e, a, b, F2, K2, M(33) );
R( b, c, d, e, a, F2, K2, M(34) );
R( a, b, c, d, e, F2, K2, M(35) );
R( e, a, b, c, d, F2, K2, M(36) );
R( d, e, a, b, c, F2, K2, M(37) );
R( c, d, e, a, b, F2, K2, M(38) );
R( b, c, d, e, a, F2, K2, M(39) );
R( a, b, c, d, e, F3, K3, M(40) );
R( e, a, b, c, d, F3, K3, M(41) );
R( d, e, a, b, c, F3, K3, M(42) );
R( c, d, e, a, b, F3, K3, M(43) );
R( b, c, d, e, a, F3, K3, M(44) );
R( a, b, c, d, e, F3, K3, M(45) );
R( e, a, b, c, d, F3, K3, M(46) );
R( d, e, a, b, c, F3, K3, M(47) );
R( c, d, e, a, b, F3, K3, M(48) );
R( b, c, d, e, a, F3, K3, M(49) );
R( a, b, c, d, e, F3, K3, M(50) );
R( e, a, b, c, d, F3, K3, M(51) );
R( d, e, a, b, c, F3, K3, M(52) );
R( c, d, e, a, b, F3, K3, M(53) );
R( b, c, d, e, a, F3, K3, M(54) );
R( a, b, c, d, e, F3, K3, M(55) );
R( e, a, b, c, d, F3, K3, M(56) );
R( d, e, a, b, c, F3, K3, M(57) );
R( c, d, e, a, b, F3, K3, M(58) );
R( b, c, d, e, a, F3, K3, M(59) );
R( a, b, c, d, e, F4, K4, M(60) );
R( e, a, b, c, d, F4, K4, M(61) );
R( d, e, a, b, c, F4, K4, M(62) );
R( c, d, e, a, b, F4, K4, M(63) );
R( b, c, d, e, a, F4, K4, M(64) );
R( a, b, c, d, e, F4, K4, M(65) );
R( e, a, b, c, d, F4, K4, M(66) );
R( d, e, a, b, c, F4, K4, M(67) );
R( c, d, e, a, b, F4, K4, M(68) );
R( b, c, d, e, a, F4, K4, M(69) );
R( a, b, c, d, e, F4, K4, M(70) );
R( e, a, b, c, d, F4, K4, M(71) );
R( d, e, a, b, c, F4, K4, M(72) );
R( c, d, e, a, b, F4, K4, M(73) );
R( b, c, d, e, a, F4, K4, M(74) );
R( a, b, c, d, e, F4, K4, M(75) );
R( e, a, b, c, d, F4, K4, M(76) );
R( d, e, a, b, c, F4, K4, M(77) );
R( c, d, e, a, b, F4, K4, M(78) );
R( b, c, d, e, a, F4, K4, M(79) );
/* Update the chaining variables. */
hd->h0 += a;
hd->h1 += b;
hd->h2 += c;
hd->h3 += d;
hd->h4 += e;
- return /* burn_stack */ 88+4*sizeof(void*);
-}
-
-
-/* Assembly implementations use SystemV ABI, ABI conversion and additional
- * stack to store XMM6-XMM15 needed on Win64. */
-#undef ASM_FUNC_ABI
-#undef ASM_EXTRA_STACK
-#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) || \
- defined(USE_SHAEXT)
-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-# define ASM_FUNC_ABI __attribute__((sysv_abi))
-# define ASM_EXTRA_STACK (10 * 16)
-# else
-# define ASM_FUNC_ABI
-# define ASM_EXTRA_STACK 0
-# endif
-#endif
-
-
-#ifdef USE_SSSE3
-unsigned int
-_gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
- size_t nblks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX
-unsigned int
-_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
- size_t nblks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_BMI2
-unsigned int
-_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
- size_t nblks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_SHAEXT
-/* Does not need ASM_FUNC_ABI */
-unsigned int
-_gcry_sha1_transform_intel_shaext (void *state, const unsigned char *data,
- size_t nblks);
-#endif
-
-
-static unsigned int
-transform (void *ctx, const unsigned char *data, size_t nblks)
-{
- SHA1_CONTEXT *hd = ctx;
- unsigned int burn;
-
-#ifdef USE_SHAEXT
- if (hd->use_shaext)
- {
- burn = _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_BMI2
- if (hd->use_bmi2)
- {
- burn = _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_AVX
- if (hd->use_avx)
- {
- burn = _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_SSSE3
- if (hd->use_ssse3)
- {
- burn = _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_ARM_CE
- if (hd->use_arm_ce)
- {
- burn = _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) : 0;
- return burn;
- }
-#endif
-#ifdef USE_NEON
- if (hd->use_neon)
- {
- burn = _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) : 0;
- return burn;
- }
-#endif
-
- do
- {
- burn = transform_blk (hd, data);
data += 64;
}
while (--nblks);
-#ifdef ASM_EXTRA_STACK
- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
- * here too.
- */
- burn += ASM_EXTRA_STACK;
-#endif
-
- return burn;
+ return 88+4*sizeof(void*);
}
/*
* Apply the SHA-1 transform function on the buffer BLOCKOF64BYTE
* which must have a length 64 bytes. BLOCKOF64BYTE must be 32-bit
* aligned. Updates the 20 bytes in BLOCKOF64BYTE with its mixed
* content. Returns the number of bytes which should be burned on the
* stack. You need to use _gcry_sha1_mixblock_init to initialize the
* context.
* WARNING: This is a special purpose function for exclusive use by
* random-csprng.c.
*/
unsigned int
_gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte)
{
u32 *p = blockof64byte;
unsigned int nburn;
- nburn = transform (hd, blockof64byte, 1);
+ nburn = (*hd->bctx.bwrite) (hd, blockof64byte, 1);
p[0] = hd->h0;
p[1] = hd->h1;
p[2] = hd->h2;
p[3] = hd->h3;
p[4] = hd->h4;
return nburn;
}
/* The routine final terminates the computation and
* returns the digest.
* The handle is prepared for a new cycle, but adding bytes to the
* handle will the destroy the returned buffer.
* Returns: 20 bytes representing the digest.
*/
static void
sha1_final(void *context)
{
SHA1_CONTEXT *hd = context;
u32 t, th, msb, lsb;
unsigned char *p;
unsigned int burn;
_gcry_md_block_write (hd, NULL, 0); /* flush */;
t = hd->bctx.nblocks;
if (sizeof t == sizeof hd->bctx.nblocks)
th = hd->bctx.nblocks_high;
else
th = hd->bctx.nblocks >> 32;
/* multiply by 64 to make a byte count */
lsb = t << 6;
msb = (th << 6) | (t >> 26);
/* add the count */
t = lsb;
if( (lsb += hd->bctx.count) < t )
msb++;
/* multiply by 8 to make a bit count */
t = lsb;
lsb <<= 3;
msb <<= 3;
msb |= t >> 29;
if( hd->bctx.count < 56 ) /* enough room */
{
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
while( hd->bctx.count < 56 )
hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
}
else /* need one extra block */
{
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
while( hd->bctx.count < 64 )
hd->bctx.buf[hd->bctx.count++] = 0;
_gcry_md_block_write(hd, NULL, 0); /* flush */;
memset(hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */
}
/* append the 64 bit count */
buf_put_be32(hd->bctx.buf + 56, msb);
buf_put_be32(hd->bctx.buf + 60, lsb);
- burn = transform( hd, hd->bctx.buf, 1 );
+ burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 1 );
_gcry_burn_stack (burn);
p = hd->bctx.buf;
#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
X(0);
X(1);
X(2);
X(3);
X(4);
#undef X
}
static unsigned char *
sha1_read( void *context )
{
SHA1_CONTEXT *hd = context;
return hd->bctx.buf;
}
/****************
* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 20 bytes.
*/
void
_gcry_sha1_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA1_CONTEXT hd;
sha1_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha1_final (&hd);
memcpy (outbuf, hd.bctx.buf, 20);
}
/* Variant of the above shortcut function using a multiple buffers. */
void
_gcry_sha1_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA1_CONTEXT hd;
sha1_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha1_final (&hd);
memcpy (outbuf, hd.bctx.buf, 20);
}
/*
Self-test section.
*/
static gpg_err_code_t
selftests_sha1 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA1, 0,
"abc", 3,
"\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E"
"\x25\x71\x78\x50\xC2\x6C\x9C\xD0\xD8\x9D", 20);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA1, 0,
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
"\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE"
"\x4A\xA1\xF9\x51\x29\xE5\xE5\x46\x70\xF1", 20);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA1, 1,
NULL, 0,
"\x34\xAA\x97\x3C\xD4\xC4\xDA\xA4\xF6\x1E"
"\xEB\x2B\xDB\xAD\x27\x31\x65\x34\x01\x6F", 20);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA1, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
/* Run a full self-test for ALGO and return 0 on success. */
static gpg_err_code_t
run_selftests (int algo, int extended, selftest_report_func_t report)
{
gpg_err_code_t ec;
switch (algo)
{
case GCRY_MD_SHA1:
ec = selftests_sha1 (extended, report);
break;
default:
ec = GPG_ERR_DIGEST_ALGO;
break;
}
return ec;
}
static unsigned char asn[15] = /* Object ID is 1.3.14.3.2.26 */
{ 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03,
0x02, 0x1a, 0x05, 0x00, 0x04, 0x14 };
static gcry_md_oid_spec_t oid_spec_sha1[] =
{
/* iso.member-body.us.rsadsi.pkcs.pkcs-1.5 (sha1WithRSAEncryption) */
{ "1.2.840.113549.1.1.5" },
/* iso.member-body.us.x9-57.x9cm.3 (dsaWithSha1)*/
{ "1.2.840.10040.4.3" },
/* from NIST's OIW (sha1) */
{ "1.3.14.3.2.26" },
/* from NIST OIW (sha-1WithRSAEncryption) */
{ "1.3.14.3.2.29" },
/* iso.member-body.us.ansi-x9-62.signatures.ecdsa-with-sha1 */
{ "1.2.840.10045.4.1" },
{ NULL },
};
gcry_md_spec_t _gcry_digest_spec_sha1 =
{
GCRY_MD_SHA1, {0, 1},
"SHA1", asn, DIM (asn), oid_spec_sha1, 20,
sha1_init, _gcry_md_block_write, sha1_final, sha1_read, NULL,
_gcry_sha1_hash_buffer, _gcry_sha1_hash_buffers,
sizeof (SHA1_CONTEXT),
run_selftests
};
diff --git a/cipher/sha1.h b/cipher/sha1.h
index 93ce79b5..acf764ba 100644
--- a/cipher/sha1.h
+++ b/cipher/sha1.h
@@ -1,41 +1,35 @@
/* sha1.h - SHA-1 context definition
* Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see .
*/
#ifndef GCRY_SHA1_H
#define GCRY_SHA1_H
#include "hash-common.h"
/* We need this here for direct use by random-csprng.c. */
typedef struct
{
gcry_md_block_ctx_t bctx;
u32 h0,h1,h2,h3,h4;
- unsigned int use_ssse3:1;
- unsigned int use_avx:1;
- unsigned int use_bmi2:1;
- unsigned int use_shaext:1;
- unsigned int use_neon:1;
- unsigned int use_arm_ce:1;
} SHA1_CONTEXT;
void _gcry_sha1_mixblock_init (SHA1_CONTEXT *hd);
unsigned int _gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte);
#endif /*GCRY_SHA1_H*/
diff --git a/cipher/sha256.c b/cipher/sha256.c
index 06959707..e82a9d90 100644
--- a/cipher/sha256.c
+++ b/cipher/sha256.c
@@ -1,788 +1,769 @@
/* sha256.c - SHA256 hash function
* Copyright (C) 2003, 2006, 2008, 2009 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see .
*/
/* Test vectors:
"abc"
SHA224: 23097d22 3405d822 8642a477 bda255b3 2aadbce4 bda0b3f7 e36c9da7
SHA256: ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
SHA224: 75388b16 512776cc 5dba5da1 fd890150 b0c6455c b4f58b19 52522525
SHA256: 248d6a61 d20638b8 e5c02693 0c3e6039 a33ce459 64ff2167 f6ecedd4 19db06c1
"a" one million times
SHA224: 20794655 980c91d8 bbb4c1ea 97618a4b f03f4258 1948b2ee 4ee7ad67
SHA256: cdc76e5c 9914fb92 81a1c7e2 84d73e67 f1809a48 a497200e 046d39cc c7112cd0
*/
#include
#include
#include
#include
#include "g10lib.h"
#include "bithelp.h"
#include "bufhelp.h"
#include "cipher.h"
#include "hash-common.h"
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSSE3 1
#endif
/* USE_AVX indicates whether to compile with Intel AVX code. */
#undef USE_AVX
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX 1
#endif
/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */
#undef USE_AVX2
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX2 1
#endif
/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
#undef USE_SHAEXT
#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
defined(HAVE_GCC_INLINE_ASM_SSE41) && \
defined(ENABLE_SHAEXT_SUPPORT)
# define USE_SHAEXT 1
#endif
/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
* code. */
#undef USE_ARM_CE
#ifdef ENABLE_ARM_CRYPTO_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
# define USE_ARM_CE 1
# elif defined(__AARCH64EL__) \
&& defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
# define USE_ARM_CE 1
# endif
#endif
typedef struct {
gcry_md_block_ctx_t bctx;
u32 h0,h1,h2,h3,h4,h5,h6,h7;
+} SHA256_CONTEXT;
+
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \
+ defined(USE_SHAEXT)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
#ifdef USE_SSSE3
- unsigned int use_ssse3:1;
+unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_ssse3(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_AVX
- unsigned int use_avx:1;
+unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_avx(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_AVX2
- unsigned int use_avx2:1;
+unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_avx2(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_SHAEXT
- unsigned int use_shaext:1;
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha256_transform_intel_shaext(u32 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha256_transform_intel_shaext(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks);
+}
#endif
+
#ifdef USE_ARM_CE
- unsigned int use_arm_ce:1;
+unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8],
+ const void *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha256_transform_armv8_ce(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks);
+}
#endif
-} SHA256_CONTEXT;
static unsigned int
-transform (void *c, const unsigned char *data, size_t nblks);
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks);
static void
sha256_init (void *context, unsigned int flags)
{
SHA256_CONTEXT *hd = context;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
hd->h0 = 0x6a09e667;
hd->h1 = 0xbb67ae85;
hd->h2 = 0x3c6ef372;
hd->h3 = 0xa54ff53a;
hd->h4 = 0x510e527f;
hd->h5 = 0x9b05688c;
hd->h6 = 0x1f83d9ab;
hd->h7 = 0x5be0cd19;
hd->bctx.nblocks = 0;
hd->bctx.nblocks_high = 0;
hd->bctx.count = 0;
hd->bctx.blocksize = 64;
- hd->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ hd->bctx.bwrite = do_transform_generic;
#ifdef USE_SSSE3
- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ hd->bctx.bwrite = do_sha256_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx;
#endif
#ifdef USE_AVX2
- hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx2;
#endif
#ifdef USE_SHAEXT
- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
- && (features & HWF_INTEL_SSE4_1);
+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+ hd->bctx.bwrite = do_sha256_transform_intel_shaext;
#endif
#ifdef USE_ARM_CE
- hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0;
+ if ((features & HWF_ARM_SHA2) != 0)
+ hd->bctx.bwrite = do_sha256_transform_armv8_ce;
#endif
(void)features;
}
static void
sha224_init (void *context, unsigned int flags)
{
SHA256_CONTEXT *hd = context;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
hd->h0 = 0xc1059ed8;
hd->h1 = 0x367cd507;
hd->h2 = 0x3070dd17;
hd->h3 = 0xf70e5939;
hd->h4 = 0xffc00b31;
hd->h5 = 0x68581511;
hd->h6 = 0x64f98fa7;
hd->h7 = 0xbefa4fa4;
hd->bctx.nblocks = 0;
hd->bctx.nblocks_high = 0;
hd->bctx.count = 0;
hd->bctx.blocksize = 64;
- hd->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ hd->bctx.bwrite = do_transform_generic;
#ifdef USE_SSSE3
- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ hd->bctx.bwrite = do_sha256_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx;
#endif
#ifdef USE_AVX2
- hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx2;
#endif
#ifdef USE_SHAEXT
- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
- && (features & HWF_INTEL_SSE4_1);
+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+ hd->bctx.bwrite = do_sha256_transform_intel_shaext;
#endif
#ifdef USE_ARM_CE
- hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0;
+ if ((features & HWF_ARM_SHA2) != 0)
+ hd->bctx.bwrite = do_sha256_transform_armv8_ce;
#endif
(void)features;
}
/*
Transform the message X which consists of 16 32-bit-words. See FIPS
180-2 for details. */
#define R(a,b,c,d,e,f,g,h,k,w) do \
{ \
t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + (k) + (w); \
t2 = Sum0((a)) + Maj((a),(b),(c)); \
d += t1; \
h = t1 + t2; \
} while (0)
/* (4.2) same as SHA-1's F1. */
#define Cho(x, y, z) (z ^ (x & (y ^ z)))
/* (4.3) same as SHA-1's F3 */
#define Maj(x, y, z) ((x & y) + (z & (x ^ y)))
/* (4.4) */
#define Sum0(x) (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22))
/* (4.5) */
#define Sum1(x) (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25))
/* Message expansion */
#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3)) /* (4.6) */
#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10)) /* (4.7) */
#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
#define W(i) ( w[i&0x0f] = S1(w[(i-2) &0x0f]) \
+ w[(i-7) &0x0f] \
+ S0(w[(i-15)&0x0f]) \
+ w[(i-16)&0x0f] )
static unsigned int
-transform_blk (void *ctx, const unsigned char *data)
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
{
SHA256_CONTEXT *hd = ctx;
static const u32 K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
- u32 a,b,c,d,e,f,g,h,t1,t2;
- u32 w[16];
-
- a = hd->h0;
- b = hd->h1;
- c = hd->h2;
- d = hd->h3;
- e = hd->h4;
- f = hd->h5;
- g = hd->h6;
- h = hd->h7;
-
- R(a, b, c, d, e, f, g, h, K[0], I(0));
- R(h, a, b, c, d, e, f, g, K[1], I(1));
- R(g, h, a, b, c, d, e, f, K[2], I(2));
- R(f, g, h, a, b, c, d, e, K[3], I(3));
- R(e, f, g, h, a, b, c, d, K[4], I(4));
- R(d, e, f, g, h, a, b, c, K[5], I(5));
- R(c, d, e, f, g, h, a, b, K[6], I(6));
- R(b, c, d, e, f, g, h, a, K[7], I(7));
- R(a, b, c, d, e, f, g, h, K[8], I(8));
- R(h, a, b, c, d, e, f, g, K[9], I(9));
- R(g, h, a, b, c, d, e, f, K[10], I(10));
- R(f, g, h, a, b, c, d, e, K[11], I(11));
- R(e, f, g, h, a, b, c, d, K[12], I(12));
- R(d, e, f, g, h, a, b, c, K[13], I(13));
- R(c, d, e, f, g, h, a, b, K[14], I(14));
- R(b, c, d, e, f, g, h, a, K[15], I(15));
-
- R(a, b, c, d, e, f, g, h, K[16], W(16));
- R(h, a, b, c, d, e, f, g, K[17], W(17));
- R(g, h, a, b, c, d, e, f, K[18], W(18));
- R(f, g, h, a, b, c, d, e, K[19], W(19));
- R(e, f, g, h, a, b, c, d, K[20], W(20));
- R(d, e, f, g, h, a, b, c, K[21], W(21));
- R(c, d, e, f, g, h, a, b, K[22], W(22));
- R(b, c, d, e, f, g, h, a, K[23], W(23));
- R(a, b, c, d, e, f, g, h, K[24], W(24));
- R(h, a, b, c, d, e, f, g, K[25], W(25));
- R(g, h, a, b, c, d, e, f, K[26], W(26));
- R(f, g, h, a, b, c, d, e, K[27], W(27));
- R(e, f, g, h, a, b, c, d, K[28], W(28));
- R(d, e, f, g, h, a, b, c, K[29], W(29));
- R(c, d, e, f, g, h, a, b, K[30], W(30));
- R(b, c, d, e, f, g, h, a, K[31], W(31));
-
- R(a, b, c, d, e, f, g, h, K[32], W(32));
- R(h, a, b, c, d, e, f, g, K[33], W(33));
- R(g, h, a, b, c, d, e, f, K[34], W(34));
- R(f, g, h, a, b, c, d, e, K[35], W(35));
- R(e, f, g, h, a, b, c, d, K[36], W(36));
- R(d, e, f, g, h, a, b, c, K[37], W(37));
- R(c, d, e, f, g, h, a, b, K[38], W(38));
- R(b, c, d, e, f, g, h, a, K[39], W(39));
- R(a, b, c, d, e, f, g, h, K[40], W(40));
- R(h, a, b, c, d, e, f, g, K[41], W(41));
- R(g, h, a, b, c, d, e, f, K[42], W(42));
- R(f, g, h, a, b, c, d, e, K[43], W(43));
- R(e, f, g, h, a, b, c, d, K[44], W(44));
- R(d, e, f, g, h, a, b, c, K[45], W(45));
- R(c, d, e, f, g, h, a, b, K[46], W(46));
- R(b, c, d, e, f, g, h, a, K[47], W(47));
-
- R(a, b, c, d, e, f, g, h, K[48], W(48));
- R(h, a, b, c, d, e, f, g, K[49], W(49));
- R(g, h, a, b, c, d, e, f, K[50], W(50));
- R(f, g, h, a, b, c, d, e, K[51], W(51));
- R(e, f, g, h, a, b, c, d, K[52], W(52));
- R(d, e, f, g, h, a, b, c, K[53], W(53));
- R(c, d, e, f, g, h, a, b, K[54], W(54));
- R(b, c, d, e, f, g, h, a, K[55], W(55));
- R(a, b, c, d, e, f, g, h, K[56], W(56));
- R(h, a, b, c, d, e, f, g, K[57], W(57));
- R(g, h, a, b, c, d, e, f, K[58], W(58));
- R(f, g, h, a, b, c, d, e, K[59], W(59));
- R(e, f, g, h, a, b, c, d, K[60], W(60));
- R(d, e, f, g, h, a, b, c, K[61], W(61));
- R(c, d, e, f, g, h, a, b, K[62], W(62));
- R(b, c, d, e, f, g, h, a, K[63], W(63));
-
- hd->h0 += a;
- hd->h1 += b;
- hd->h2 += c;
- hd->h3 += d;
- hd->h4 += e;
- hd->h5 += f;
- hd->h6 += g;
- hd->h7 += h;
-
- return /*burn_stack*/ 26*4+32;
-}
-#undef S0
-#undef S1
-#undef R
-
-
-/* Assembly implementations use SystemV ABI, ABI conversion and additional
- * stack to store XMM6-XMM15 needed on Win64. */
-#undef ASM_FUNC_ABI
-#undef ASM_EXTRA_STACK
-#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \
- defined(USE_SHAEXT)
-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-# define ASM_FUNC_ABI __attribute__((sysv_abi))
-# define ASM_EXTRA_STACK (10 * 16)
-# else
-# define ASM_FUNC_ABI
-# define ASM_EXTRA_STACK 0
-# endif
-#endif
-
-
-#ifdef USE_SSSE3
-unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
- u32 state[8],
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX
-unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
- u32 state[8],
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX2
-unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
- u32 state[8],
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_SHAEXT
-/* Does not need ASM_FUNC_ABI */
-unsigned int
-_gcry_sha256_transform_intel_shaext(u32 state[8],
- const unsigned char *input_data,
- size_t num_blks);
-#endif
-
-#ifdef USE_ARM_CE
-unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8],
- const void *input_data,
- size_t num_blks);
-#endif
-
-static unsigned int
-transform (void *ctx, const unsigned char *data, size_t nblks)
-{
- SHA256_CONTEXT *hd = ctx;
- unsigned int burn;
-
-#ifdef USE_SHAEXT
- if (hd->use_shaext)
- {
- burn = _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-
-#ifdef USE_AVX2
- if (hd->use_avx2)
- {
- burn = _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-
-#ifdef USE_AVX
- if (hd->use_avx)
- {
- burn = _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-
-#ifdef USE_SSSE3
- if (hd->use_ssse3)
+ do
{
- burn = _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_ARM_CE
- if (hd->use_arm_ce)
- {
- burn = _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) : 0;
- return burn;
- }
-#endif
+ u32 a,b,c,d,e,f,g,h,t1,t2;
+ u32 w[16];
+
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+ f = hd->h5;
+ g = hd->h6;
+ h = hd->h7;
+
+ R(a, b, c, d, e, f, g, h, K[0], I(0));
+ R(h, a, b, c, d, e, f, g, K[1], I(1));
+ R(g, h, a, b, c, d, e, f, K[2], I(2));
+ R(f, g, h, a, b, c, d, e, K[3], I(3));
+ R(e, f, g, h, a, b, c, d, K[4], I(4));
+ R(d, e, f, g, h, a, b, c, K[5], I(5));
+ R(c, d, e, f, g, h, a, b, K[6], I(6));
+ R(b, c, d, e, f, g, h, a, K[7], I(7));
+ R(a, b, c, d, e, f, g, h, K[8], I(8));
+ R(h, a, b, c, d, e, f, g, K[9], I(9));
+ R(g, h, a, b, c, d, e, f, K[10], I(10));
+ R(f, g, h, a, b, c, d, e, K[11], I(11));
+ R(e, f, g, h, a, b, c, d, K[12], I(12));
+ R(d, e, f, g, h, a, b, c, K[13], I(13));
+ R(c, d, e, f, g, h, a, b, K[14], I(14));
+ R(b, c, d, e, f, g, h, a, K[15], I(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ hd->h0 += a;
+ hd->h1 += b;
+ hd->h2 += c;
+ hd->h3 += d;
+ hd->h4 += e;
+ hd->h5 += f;
+ hd->h6 += g;
+ hd->h7 += h;
- do
- {
- burn = transform_blk (hd, data);
data += 64;
}
while (--nblks);
-#ifdef ASM_EXTRA_STACK
- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
- * here too.
- */
- burn += ASM_EXTRA_STACK;
-#endif
-
- return burn;
+ return 26*4 + 32 + 3 * sizeof(void*);
}
+#undef S0
+#undef S1
+#undef R
+
/*
The routine finally terminates the computation and returns the
digest. The handle is prepared for a new cycle, but adding bytes
to the handle will the destroy the returned buffer. Returns: 32
bytes with the message the digest. */
static void
sha256_final(void *context)
{
SHA256_CONTEXT *hd = context;
u32 t, th, msb, lsb;
byte *p;
unsigned int burn;
_gcry_md_block_write (hd, NULL, 0); /* flush */;
t = hd->bctx.nblocks;
if (sizeof t == sizeof hd->bctx.nblocks)
th = hd->bctx.nblocks_high;
else
th = hd->bctx.nblocks >> 32;
/* multiply by 64 to make a byte count */
lsb = t << 6;
msb = (th << 6) | (t >> 26);
/* add the count */
t = lsb;
if ((lsb += hd->bctx.count) < t)
msb++;
/* multiply by 8 to make a bit count */
t = lsb;
lsb <<= 3;
msb <<= 3;
msb |= t >> 29;
if (hd->bctx.count < 56)
{ /* enough room */
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
while (hd->bctx.count < 56)
hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
}
else
{ /* need one extra block */
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
while (hd->bctx.count < 64)
hd->bctx.buf[hd->bctx.count++] = 0;
_gcry_md_block_write (hd, NULL, 0); /* flush */;
memset (hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */
}
/* append the 64 bit count */
buf_put_be32(hd->bctx.buf + 56, msb);
buf_put_be32(hd->bctx.buf + 60, lsb);
- burn = transform (hd, hd->bctx.buf, 1);
+ burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
_gcry_burn_stack (burn);
p = hd->bctx.buf;
#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
X(0);
X(1);
X(2);
X(3);
X(4);
X(5);
X(6);
X(7);
#undef X
}
static byte *
sha256_read (void *context)
{
SHA256_CONTEXT *hd = context;
return hd->bctx.buf;
}
/* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 32 bytes. */
void
_gcry_sha256_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA256_CONTEXT hd;
sha256_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha256_final (&hd);
memcpy (outbuf, hd.bctx.buf, 32);
}
/* Variant of the above shortcut function using multiple buffers. */
void
_gcry_sha256_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA256_CONTEXT hd;
sha256_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha256_final (&hd);
memcpy (outbuf, hd.bctx.buf, 32);
}
/* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 28 bytes. */
static void
_gcry_sha224_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA256_CONTEXT hd;
sha224_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha256_final (&hd);
memcpy (outbuf, hd.bctx.buf, 28);
}
/* Variant of the above shortcut function using multiple buffers. */
static void
_gcry_sha224_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA256_CONTEXT hd;
sha224_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha256_final (&hd);
memcpy (outbuf, hd.bctx.buf, 28);
}
/*
Self-test section.
*/
static gpg_err_code_t
selftests_sha224 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA224, 0,
"abc", 3,
"\x23\x09\x7d\x22\x34\x05\xd8\x22\x86\x42\xa4\x77\xbd\xa2\x55\xb3"
"\x2a\xad\xbc\xe4\xbd\xa0\xb3\xf7\xe3\x6c\x9d\xa7", 28);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA224, 0,
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
"\x75\x38\x8b\x16\x51\x27\x76\xcc\x5d\xba\x5d\xa1\xfd\x89\x01\x50"
"\xb0\xc6\x45\x5c\xb4\xf5\x8b\x19\x52\x52\x25\x25", 28);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA224, 1,
NULL, 0,
"\x20\x79\x46\x55\x98\x0c\x91\xd8\xbb\xb4\xc1\xea\x97\x61\x8a\x4b"
"\xf0\x3f\x42\x58\x19\x48\xb2\xee\x4e\xe7\xad\x67", 28);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA224, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
static gpg_err_code_t
selftests_sha256 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA256, 0,
"abc", 3,
"\xba\x78\x16\xbf\x8f\x01\xcf\xea\x41\x41\x40\xde\x5d\xae\x22\x23"
"\xb0\x03\x61\xa3\x96\x17\x7a\x9c\xb4\x10\xff\x61\xf2\x00\x15\xad", 32);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA256, 0,
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
"\x24\x8d\x6a\x61\xd2\x06\x38\xb8\xe5\xc0\x26\x93\x0c\x3e\x60\x39"
"\xa3\x3c\xe4\x59\x64\xff\x21\x67\xf6\xec\xed\xd4\x19\xdb\x06\xc1",
32);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA256, 1,
NULL, 0,
"\xcd\xc7\x6e\x5c\x99\x14\xfb\x92\x81\xa1\xc7\xe2\x84\xd7\x3e\x67"
"\xf1\x80\x9a\x48\xa4\x97\x20\x0e\x04\x6d\x39\xcc\xc7\x11\x2c\xd0",
32);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA256, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
/* Run a full self-test for ALGO and return 0 on success. */
static gpg_err_code_t
run_selftests (int algo, int extended, selftest_report_func_t report)
{
gpg_err_code_t ec;
switch (algo)
{
case GCRY_MD_SHA224:
ec = selftests_sha224 (extended, report);
break;
case GCRY_MD_SHA256:
ec = selftests_sha256 (extended, report);
break;
default:
ec = GPG_ERR_DIGEST_ALGO;
break;
}
return ec;
}
static byte asn224[19] = /* Object ID is 2.16.840.1.101.3.4.2.4 */
{ 0x30, 0x2D, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48,
0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x05, 0x00, 0x04,
0x1C
};
static gcry_md_oid_spec_t oid_spec_sha224[] =
{
/* From RFC3874, Section 4 */
{ "2.16.840.1.101.3.4.2.4" },
{ NULL },
};
static byte asn256[19] = /* Object ID is 2.16.840.1.101.3.4.2.1 */
{ 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05,
0x00, 0x04, 0x20 };
static gcry_md_oid_spec_t oid_spec_sha256[] =
{
/* According to the OpenPGP draft rfc2440-bis06 */
{ "2.16.840.1.101.3.4.2.1" },
/* PKCS#1 sha256WithRSAEncryption */
{ "1.2.840.113549.1.1.11" },
{ NULL },
};
gcry_md_spec_t _gcry_digest_spec_sha224 =
{
GCRY_MD_SHA224, {0, 1},
"SHA224", asn224, DIM (asn224), oid_spec_sha224, 28,
sha224_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
_gcry_sha224_hash_buffer, _gcry_sha224_hash_buffers,
sizeof (SHA256_CONTEXT),
run_selftests
};
gcry_md_spec_t _gcry_digest_spec_sha256 =
{
GCRY_MD_SHA256, {0, 1},
"SHA256", asn256, DIM (asn256), oid_spec_sha256, 32,
sha256_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
_gcry_sha256_hash_buffer, _gcry_sha256_hash_buffers,
sizeof (SHA256_CONTEXT),
run_selftests
};
diff --git a/cipher/sha512-armv7-neon.S b/cipher/sha512-armv7-neon.S
index a9d12724..6596f2cd 100644
--- a/cipher/sha512-armv7-neon.S
+++ b/cipher/sha512-armv7-neon.S
@@ -1,449 +1,450 @@
/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
*
* Copyright (C) 2013 Jussi Kivilinna
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see .
*/
#include
#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
defined(HAVE_GCC_INLINE_ASM_NEON)
.text
.syntax unified
.fpu neon
.arm
/* structure of SHA512_CONTEXT */
#define hd_a 0
#define hd_b ((hd_a) + 8)
#define hd_c ((hd_b) + 8)
#define hd_d ((hd_c) + 8)
#define hd_e ((hd_d) + 8)
#define hd_f ((hd_e) + 8)
#define hd_g ((hd_f) + 8)
/* register macros */
#define RK %r2
#define RA d0
#define RB d1
#define RC d2
#define RD d3
#define RE d4
#define RF d5
#define RG d6
#define RH d7
#define RT0 d8
#define RT1 d9
#define RT2 d10
#define RT3 d11
#define RT4 d12
#define RT5 d13
#define RT6 d14
#define RT7 d15
#define RT01q q4
#define RT23q q5
#define RT45q q6
#define RT67q q7
#define RW0 d16
#define RW1 d17
#define RW2 d18
#define RW3 d19
#define RW4 d20
#define RW5 d21
#define RW6 d22
#define RW7 d23
#define RW8 d24
#define RW9 d25
#define RW10 d26
#define RW11 d27
#define RW12 d28
#define RW13 d29
#define RW14 d30
#define RW15 d31
#define RW01q q8
#define RW23q q9
#define RW45q q10
#define RW67q q11
#define RW89q q12
#define RW1011q q13
#define RW1213q q14
#define RW1415q q15
/***********************************************************************
* ARM assembly implementation of sha512 transform
***********************************************************************/
#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
vshr.u64 RT2, re, #14; \
vshl.u64 RT3, re, #64 - 14; \
interleave_op(arg1); \
vshr.u64 RT4, re, #18; \
vshl.u64 RT5, re, #64 - 18; \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, re, #41; \
vshl.u64 RT5, re, #64 - 41; \
vadd.u64 RT0, RT0, rw0; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, re; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, rf, rg; \
\
vadd.u64 RT1, RT1, rh; \
vshr.u64 RT2, ra, #28; \
vshl.u64 RT3, ra, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, ra, #34; \
vshl.u64 RT5, ra, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* h = Sum0 (a) + Maj (a, b, c); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, ra, #39; \
vshl.u64 RT5, ra, #64 - 39; \
veor.64 RT0, ra, rb; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rc, rb; \
vadd.u64 rd, rd, RT1; /* d+=t1; */ \
veor.64 rh, RT2, RT3; \
\
/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
vshr.u64 RT2, rd, #14; \
vshl.u64 RT3, rd, #64 - 14; \
vadd.u64 rh, rh, RT0; \
vshr.u64 RT4, rd, #18; \
vshl.u64 RT5, rd, #64 - 18; \
vadd.u64 rh, rh, RT1; /* h+=t1; */ \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rd, #41; \
vshl.u64 RT5, rd, #64 - 41; \
vadd.u64 RT0, RT0, rw1; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, rd; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, re, rf; \
\
vadd.u64 RT1, RT1, rg; \
vshr.u64 RT2, rh, #28; \
vshl.u64 RT3, rh, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, rh, #34; \
vshl.u64 RT5, rh, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* g = Sum0 (h) + Maj (h, a, b); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rh, #39; \
vshl.u64 RT5, rh, #64 - 39; \
veor.64 RT0, rh, ra; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rb, ra; \
vadd.u64 rc, rc, RT1; /* c+=t1; */ \
veor.64 rg, RT2, RT3; \
\
/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
\
/**** S0(w[1:2]) */ \
\
/* w[0:1] += w[9:10] */ \
/* RT23q = rw1:rw2 */ \
vext.u64 RT23q, rw01q, rw23q, #1; \
vadd.u64 rw0, rw9; \
vadd.u64 rg, rg, RT0; \
vadd.u64 rw1, rw10;\
vadd.u64 rg, rg, RT1; /* g+=t1; */ \
\
vshr.u64 RT45q, RT23q, #1; \
vshl.u64 RT67q, RT23q, #64 - 1; \
vshr.u64 RT01q, RT23q, #8; \
veor.u64 RT45q, RT45q, RT67q; \
vshl.u64 RT67q, RT23q, #64 - 8; \
veor.u64 RT45q, RT45q, RT01q; \
vshr.u64 RT01q, RT23q, #7; \
veor.u64 RT45q, RT45q, RT67q; \
\
/**** S1(w[14:15]) */ \
vshr.u64 RT23q, rw1415q, #6; \
veor.u64 RT01q, RT01q, RT45q; \
vshr.u64 RT45q, rw1415q, #19; \
vshl.u64 RT67q, rw1415q, #64 - 19; \
veor.u64 RT23q, RT23q, RT45q; \
vshr.u64 RT45q, rw1415q, #61; \
veor.u64 RT23q, RT23q, RT67q; \
vshl.u64 RT67q, rw1415q, #64 - 61; \
veor.u64 RT23q, RT23q, RT45q; \
vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
veor.u64 RT01q, RT23q, RT67q;
#define vadd_RT01q(rw01q) \
/* w[0:1] += S(w[14:15]) */ \
vadd.u64 rw01q, RT01q;
#define dummy(_) /*_*/
#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, interleave_op1, arg1, interleave_op2, arg2) \
/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
vshr.u64 RT2, re, #14; \
vshl.u64 RT3, re, #64 - 14; \
interleave_op1(arg1); \
vshr.u64 RT4, re, #18; \
vshl.u64 RT5, re, #64 - 18; \
interleave_op2(arg2); \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, re, #41; \
vshl.u64 RT5, re, #64 - 41; \
vadd.u64 RT0, RT0, rw0; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, re; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, rf, rg; \
\
vadd.u64 RT1, RT1, rh; \
vshr.u64 RT2, ra, #28; \
vshl.u64 RT3, ra, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, ra, #34; \
vshl.u64 RT5, ra, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* h = Sum0 (a) + Maj (a, b, c); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, ra, #39; \
vshl.u64 RT5, ra, #64 - 39; \
veor.64 RT0, ra, rb; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rc, rb; \
vadd.u64 rd, rd, RT1; /* d+=t1; */ \
veor.64 rh, RT2, RT3; \
\
/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
vshr.u64 RT2, rd, #14; \
vshl.u64 RT3, rd, #64 - 14; \
vadd.u64 rh, rh, RT0; \
vshr.u64 RT4, rd, #18; \
vshl.u64 RT5, rd, #64 - 18; \
vadd.u64 rh, rh, RT1; /* h+=t1; */ \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rd, #41; \
vshl.u64 RT5, rd, #64 - 41; \
vadd.u64 RT0, RT0, rw1; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, rd; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, re, rf; \
\
vadd.u64 RT1, RT1, rg; \
vshr.u64 RT2, rh, #28; \
vshl.u64 RT3, rh, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, rh, #34; \
vshl.u64 RT5, rh, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* g = Sum0 (h) + Maj (h, a, b); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rh, #39; \
vshl.u64 RT5, rh, #64 - 39; \
veor.64 RT0, rh, ra; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rb, ra; \
vadd.u64 rc, rc, RT1; /* c+=t1; */ \
veor.64 rg, RT2, RT3;
#define vadd_rg_RT0(rg) \
vadd.u64 rg, rg, RT0;
#define vadd_rg_RT1(rg) \
vadd.u64 rg, rg, RT1; /* g+=t1; */
.align 3
.globl _gcry_sha512_transform_armv7_neon
.type _gcry_sha512_transform_armv7_neon,%function;
_gcry_sha512_transform_armv7_neon:
/* Input:
* %r0: SHA512_CONTEXT
* %r1: data
* %r2: u64 k[] constants
* %r3: nblks
*/
push {%lr};
mov %lr, #0;
/* Load context to d0-d7 */
vld1.64 {RA-RD}, [%r0]!;
vld1.64 {RE-RH}, [%r0];
sub %r0, #(4*8);
/* Load input to w[16], d16-d31 */
/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
vld1.64 {RW0-RW3}, [%r1]!;
vld1.64 {RW4-RW7}, [%r1]!;
vld1.64 {RW8-RW11}, [%r1]!;
vld1.64 {RW12-RW15}, [%r1]!;
#ifdef __ARMEL__
/* byteswap */
vrev64.8 RW01q, RW01q;
vrev64.8 RW23q, RW23q;
vrev64.8 RW45q, RW45q;
vrev64.8 RW67q, RW67q;
vrev64.8 RW89q, RW89q;
vrev64.8 RW1011q, RW1011q;
vrev64.8 RW1213q, RW1213q;
vrev64.8 RW1415q, RW1415q;
#endif
/* EABI says that d8-d15 must be preserved by callee. */
vpush {RT0-RT7};
.Loop:
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, dummy, _);
b .Lenter_rounds;
.Loop_rounds:
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
.Lenter_rounds:
rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
add %lr, #16;
rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
cmp %lr, #64;
rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
bne .Loop_rounds;
subs %r3, #1;
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, vadd_RT01q, RW1415q, dummy, _);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
beq .Lhandle_tail;
vld1.64 {RW0-RW3}, [%r1]!;
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
#ifdef __ARMEL__
vrev64.8 RW01q, RW01q;
vrev64.8 RW23q, RW23q;
#endif
vld1.64 {RW4-RW7}, [%r1]!;
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
#ifdef __ARMEL__
vrev64.8 RW45q, RW45q;
vrev64.8 RW67q, RW67q;
#endif
vld1.64 {RW8-RW11}, [%r1]!;
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
#ifdef __ARMEL__
vrev64.8 RW89q, RW89q;
vrev64.8 RW1011q, RW1011q;
#endif
vld1.64 {RW12-RW15}, [%r1]!;
vadd_rg_RT0(RA);
vadd_rg_RT1(RA);
/* Load context */
vld1.64 {RT0-RT3}, [%r0]!;
vld1.64 {RT4-RT7}, [%r0];
sub %r0, #(4*8);
#ifdef __ARMEL__
vrev64.8 RW1213q, RW1213q;
vrev64.8 RW1415q, RW1415q;
#endif
vadd.u64 RA, RT0;
vadd.u64 RB, RT1;
vadd.u64 RC, RT2;
vadd.u64 RD, RT3;
vadd.u64 RE, RT4;
vadd.u64 RF, RT5;
vadd.u64 RG, RT6;
vadd.u64 RH, RT7;
/* Store the first half of context */
vst1.64 {RA-RD}, [%r0]!;
sub RK, $(8*80);
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
mov %lr, #0;
sub %r0, #(4*8);
b .Loop;
.ltorg
.Lhandle_tail:
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
/* Load context to d16-d23 */
vld1.64 {RW0-RW3}, [%r0]!;
vadd_rg_RT0(RA);
vld1.64 {RW4-RW7}, [%r0];
vadd_rg_RT1(RA);
sub %r0, #(4*8);
vadd.u64 RA, RW0;
vadd.u64 RB, RW1;
vadd.u64 RC, RW2;
vadd.u64 RD, RW3;
vadd.u64 RE, RW4;
vadd.u64 RF, RW5;
vadd.u64 RG, RW6;
vadd.u64 RH, RW7;
/* Store the first half of context */
vst1.64 {RA-RD}, [%r0]!;
/* Clear used registers */
/* d16-d31 */
veor.u64 RW01q, RW01q;
veor.u64 RW23q, RW23q;
veor.u64 RW45q, RW45q;
veor.u64 RW67q, RW67q;
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
veor.u64 RW89q, RW89q;
veor.u64 RW1011q, RW1011q;
veor.u64 RW1213q, RW1213q;
veor.u64 RW1415q, RW1415q;
/* d8-d15 */
vpop {RT0-RT7};
/* d0-d7 (q0-q3) */
veor.u64 %q0, %q0;
veor.u64 %q1, %q1;
veor.u64 %q2, %q2;
veor.u64 %q3, %q3;
+ eor %r0, %r0;
pop {%pc};
.size _gcry_sha512_transform_armv7_neon,.-_gcry_sha512_transform_armv7_neon;
#endif
diff --git a/cipher/sha512.c b/cipher/sha512.c
index 9405de80..721f3405 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -1,991 +1,951 @@
/* sha512.c - SHA384 and SHA512 hash functions
* Copyright (C) 2003, 2008, 2009 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser general Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see .
*/
/* Test vectors from FIPS-180-2:
*
* "abc"
* 384:
* CB00753F 45A35E8B B5A03D69 9AC65007 272C32AB 0EDED163
* 1A8B605A 43FF5BED 8086072B A1E7CC23 58BAECA1 34C825A7
* 512:
* DDAF35A1 93617ABA CC417349 AE204131 12E6FA4E 89A97EA2 0A9EEEE6 4B55D39A
* 2192992A 274FC1A8 36BA3C23 A3FEEBBD 454D4423 643CE80E 2A9AC94F A54CA49F
*
* "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"
* 384:
* 09330C33 F71147E8 3D192FC7 82CD1B47 53111B17 3B3B05D2
* 2FA08086 E3B0F712 FCC7C71A 557E2DB9 66C3E9FA 91746039
* 512:
* 8E959B75 DAE313DA 8CF4F728 14FC143F 8F7779C6 EB9F7FA1 7299AEAD B6889018
* 501D289E 4900F7E4 331B99DE C4B5433A C7D329EE B6DD2654 5E96E55B 874BE909
*
* "a" x 1000000
* 384:
* 9D0E1809 716474CB 086E834E 310A4A1C ED149E9C 00F24852
* 7972CEC5 704C2A5B 07B8B3DC 38ECC4EB AE97DDD8 7F3D8985
* 512:
* E718483D 0CE76964 4E2E42C7 BC15B463 8E1F98B1 3B204428 5632A803 AFA973EB
* DE0FF244 877EA60A 4CB0432C E577C31B EB009C5C 2C49AA2E 4EADB217 AD8CC09B
*/
#include
#include
#include "g10lib.h"
#include "bithelp.h"
#include "bufhelp.h"
#include "cipher.h"
#include "hash-common.h"
/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */
#undef USE_ARM_NEON_ASM
#ifdef ENABLE_NEON_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_NEON)
# define USE_ARM_NEON_ASM 1
# endif
#endif /*ENABLE_NEON_SUPPORT*/
/* USE_ARM_ASM indicates whether to enable ARM assembly code. */
#undef USE_ARM_ASM
#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
# define USE_ARM_ASM 1
#endif
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSSE3 1
#endif
/* USE_AVX indicates whether to compile with Intel AVX code. */
#undef USE_AVX
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX 1
#endif
/* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */
#undef USE_AVX2
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX2 1
#endif
typedef struct
{
u64 h0, h1, h2, h3, h4, h5, h6, h7;
} SHA512_STATE;
typedef struct
{
gcry_md_block_ctx_t bctx;
SHA512_STATE state;
+} SHA512_CONTEXT;
+
+
+static const u64 k[] =
+ {
+ U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
+ U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
+ U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
+ U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
+ U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
+ U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
+ U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
+ U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
+ U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
+ U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
+ U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
+ U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
+ U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
+ U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
+ U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
+ U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
+ U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
+ U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
+ U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
+ U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
+ U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
+ U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
+ U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
+ U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
+ U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
+ U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
+ U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
+ U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
+ U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
+ U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
+ U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
+ U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
+ U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
+ U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
+ U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
+ U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
+ U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
+ U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
+ U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
+ U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
+ };
+
+
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + 4 * sizeof(void *))
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
#ifdef USE_ARM_NEON_ASM
- unsigned int use_neon:1;
+unsigned int _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
+ const unsigned char *data,
+ const u64 k[], size_t num_blks);
+
+static unsigned int
+do_sha512_transform_armv7_neon(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_armv7_neon (&hd->state, data, k, nblks);
+}
#endif
+
#ifdef USE_SSSE3
- unsigned int use_ssse3:1;
+unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_ssse3(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_ssse3 (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_AVX
- unsigned int use_avx:1;
+unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_avx(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_avx (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_AVX2
- unsigned int use_avx2:1;
+unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_avx2(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_avx2 (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
-} SHA512_CONTEXT;
+
+
+#ifdef USE_ARM_ASM
+unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
+ const unsigned char *data,
+ const u64 k[], size_t num_blks);
static unsigned int
-transform (void *context, const unsigned char *data, size_t nblks);
+do_transform_generic (void *context, const unsigned char *data, size_t nblks)
+{
+ SHA512_CONTEXT *hd = context;
+ return _gcry_sha512_transform_armv7_neon (&hd->state, data, k, nblks);
+}
+#else
+static unsigned int
+do_transform_generic (void *context, const unsigned char *data, size_t nblks);
+#endif
+
static void
sha512_init (void *context, unsigned int flags)
{
SHA512_CONTEXT *ctx = context;
SHA512_STATE *hd = &ctx->state;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
+ (void)k;
hd->h0 = U64_C(0x6a09e667f3bcc908);
hd->h1 = U64_C(0xbb67ae8584caa73b);
hd->h2 = U64_C(0x3c6ef372fe94f82b);
hd->h3 = U64_C(0xa54ff53a5f1d36f1);
hd->h4 = U64_C(0x510e527fade682d1);
hd->h5 = U64_C(0x9b05688c2b3e6c1f);
hd->h6 = U64_C(0x1f83d9abfb41bd6b);
hd->h7 = U64_C(0x5be0cd19137e2179);
ctx->bctx.nblocks = 0;
ctx->bctx.nblocks_high = 0;
ctx->bctx.count = 0;
ctx->bctx.blocksize = 128;
- ctx->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ ctx->bctx.bwrite = do_transform_generic;
#ifdef USE_ARM_NEON_ASM
- ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+ if ((features & HWF_ARM_NEON) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_armv7_neon;
#endif
#ifdef USE_SSSE3
- ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
- ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx;
#endif
#ifdef USE_AVX2
- ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx2;
#endif
-
(void)features;
}
static void
sha384_init (void *context, unsigned int flags)
{
SHA512_CONTEXT *ctx = context;
SHA512_STATE *hd = &ctx->state;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
hd->h0 = U64_C(0xcbbb9d5dc1059ed8);
hd->h1 = U64_C(0x629a292a367cd507);
hd->h2 = U64_C(0x9159015a3070dd17);
hd->h3 = U64_C(0x152fecd8f70e5939);
hd->h4 = U64_C(0x67332667ffc00b31);
hd->h5 = U64_C(0x8eb44a8768581511);
hd->h6 = U64_C(0xdb0c2e0d64f98fa7);
hd->h7 = U64_C(0x47b5481dbefa4fa4);
ctx->bctx.nblocks = 0;
ctx->bctx.nblocks_high = 0;
ctx->bctx.count = 0;
ctx->bctx.blocksize = 128;
- ctx->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ ctx->bctx.bwrite = do_transform_generic;
#ifdef USE_ARM_NEON_ASM
- ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+ if ((features & HWF_ARM_NEON) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_armv7_neon;
#endif
#ifdef USE_SSSE3
- ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
- ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx;
#endif
#ifdef USE_AVX2
- ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx2;
#endif
-
(void)features;
}
-static const u64 k[] =
- {
- U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
- U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
- U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
- U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
- U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
- U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
- U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
- U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
- U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
- U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
- U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
- U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
- U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
- U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
- U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
- U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
- U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
- U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
- U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
- U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
- U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
- U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
- U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
- U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
- U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
- U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
- U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
- U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
- U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
- U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
- U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
- U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
- U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
- U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
- U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
- U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
- U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
- U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
- U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
- U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
- };
-
#ifndef USE_ARM_ASM
static inline u64
ROTR (u64 x, u64 n)
{
return ((x >> n) | (x << (64 - n)));
}
static inline u64
Ch (u64 x, u64 y, u64 z)
{
return ((x & y) ^ ( ~x & z));
}
static inline u64
Maj (u64 x, u64 y, u64 z)
{
return ((x & y) ^ (x & z) ^ (y & z));
}
static inline u64
Sum0 (u64 x)
{
return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39));
}
static inline u64
Sum1 (u64 x)
{
return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41));
}
/****************
* Transform the message W which consists of 16 64-bit-words
*/
static unsigned int
-transform_blk (SHA512_STATE *hd, const unsigned char *data)
-{
- u64 a, b, c, d, e, f, g, h;
- u64 w[16];
- int t;
-
- /* get values from the chaining vars */
- a = hd->h0;
- b = hd->h1;
- c = hd->h2;
- d = hd->h3;
- e = hd->h4;
- f = hd->h5;
- g = hd->h6;
- h = hd->h7;
-
- for ( t = 0; t < 16; t++ )
- w[t] = buf_get_be64(data + t * 8);
-
-#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
-#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
-
- for (t = 0; t < 80 - 16; )
- {
- u64 t1, t2;
-
- /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e
- with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes
- initialized to 0,1,2,3...255,0,... and 1000 iterations:
-
- Not unrolled with macros: 440ms
- Unrolled with macros: 350ms
- Unrolled with inline: 330ms
- */
-#if 0 /* Not unrolled. */
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16];
- w[t%16] += S1 (w[(t - 2)%16]) + w[(t - 7)%16] + S0 (w[(t - 15)%16]);
- t2 = Sum0 (a) + Maj (a, b, c);
- h = g;
- g = f;
- f = e;
- e = d + t1;
- d = c;
- c = b;
- b = a;
- a = t1 + t2;
- t++;
-#else /* Unrolled to interweave the chain variables. */
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
- w[0] += S1 (w[14]) + w[9] + S0 (w[1]);
- t2 = Sum0 (a) + Maj (a, b, c);
- d += t1;
- h = t1 + t2;
-
- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
- w[1] += S1 (w[15]) + w[10] + S0 (w[2]);
- t2 = Sum0 (h) + Maj (h, a, b);
- c += t1;
- g = t1 + t2;
-
- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
- w[2] += S1 (w[0]) + w[11] + S0 (w[3]);
- t2 = Sum0 (g) + Maj (g, h, a);
- b += t1;
- f = t1 + t2;
-
- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
- w[3] += S1 (w[1]) + w[12] + S0 (w[4]);
- t2 = Sum0 (f) + Maj (f, g, h);
- a += t1;
- e = t1 + t2;
-
- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
- w[4] += S1 (w[2]) + w[13] + S0 (w[5]);
- t2 = Sum0 (e) + Maj (e, f, g);
- h += t1;
- d = t1 + t2;
-
- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
- w[5] += S1 (w[3]) + w[14] + S0 (w[6]);
- t2 = Sum0 (d) + Maj (d, e, f);
- g += t1;
- c = t1 + t2;
-
- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
- w[6] += S1 (w[4]) + w[15] + S0 (w[7]);
- t2 = Sum0 (c) + Maj (c, d, e);
- f += t1;
- b = t1 + t2;
-
- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
- w[7] += S1 (w[5]) + w[0] + S0 (w[8]);
- t2 = Sum0 (b) + Maj (b, c, d);
- e += t1;
- a = t1 + t2;
-
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
- w[8] += S1 (w[6]) + w[1] + S0 (w[9]);
- t2 = Sum0 (a) + Maj (a, b, c);
- d += t1;
- h = t1 + t2;
-
- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
- w[9] += S1 (w[7]) + w[2] + S0 (w[10]);
- t2 = Sum0 (h) + Maj (h, a, b);
- c += t1;
- g = t1 + t2;
-
- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
- w[10] += S1 (w[8]) + w[3] + S0 (w[11]);
- t2 = Sum0 (g) + Maj (g, h, a);
- b += t1;
- f = t1 + t2;
-
- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
- w[11] += S1 (w[9]) + w[4] + S0 (w[12]);
- t2 = Sum0 (f) + Maj (f, g, h);
- a += t1;
- e = t1 + t2;
-
- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
- w[12] += S1 (w[10]) + w[5] + S0 (w[13]);
- t2 = Sum0 (e) + Maj (e, f, g);
- h += t1;
- d = t1 + t2;
-
- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
- w[13] += S1 (w[11]) + w[6] + S0 (w[14]);
- t2 = Sum0 (d) + Maj (d, e, f);
- g += t1;
- c = t1 + t2;
-
- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
- w[14] += S1 (w[12]) + w[7] + S0 (w[15]);
- t2 = Sum0 (c) + Maj (c, d, e);
- f += t1;
- b = t1 + t2;
-
- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
- w[15] += S1 (w[13]) + w[8] + S0 (w[0]);
- t2 = Sum0 (b) + Maj (b, c, d);
- e += t1;
- a = t1 + t2;
-
- t += 16;
-#endif
- }
-
- for (; t < 80; )
- {
- u64 t1, t2;
-
-#if 0 /* Not unrolled. */
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16];
- t2 = Sum0 (a) + Maj (a, b, c);
- h = g;
- g = f;
- f = e;
- e = d + t1;
- d = c;
- c = b;
- b = a;
- a = t1 + t2;
- t++;
-#else /* Unrolled to interweave the chain variables. */
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
- t2 = Sum0 (a) + Maj (a, b, c);
- d += t1;
- h = t1 + t2;
-
- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
- t2 = Sum0 (h) + Maj (h, a, b);
- c += t1;
- g = t1 + t2;
-
- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
- t2 = Sum0 (g) + Maj (g, h, a);
- b += t1;
- f = t1 + t2;
-
- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
- t2 = Sum0 (f) + Maj (f, g, h);
- a += t1;
- e = t1 + t2;
-
- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
- t2 = Sum0 (e) + Maj (e, f, g);
- h += t1;
- d = t1 + t2;
-
- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
- t2 = Sum0 (d) + Maj (d, e, f);
- g += t1;
- c = t1 + t2;
-
- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
- t2 = Sum0 (c) + Maj (c, d, e);
- f += t1;
- b = t1 + t2;
-
- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
- t2 = Sum0 (b) + Maj (b, c, d);
- e += t1;
- a = t1 + t2;
-
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
- t2 = Sum0 (a) + Maj (a, b, c);
- d += t1;
- h = t1 + t2;
-
- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
- t2 = Sum0 (h) + Maj (h, a, b);
- c += t1;
- g = t1 + t2;
-
- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
- t2 = Sum0 (g) + Maj (g, h, a);
- b += t1;
- f = t1 + t2;
-
- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
- t2 = Sum0 (f) + Maj (f, g, h);
- a += t1;
- e = t1 + t2;
-
- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
- t2 = Sum0 (e) + Maj (e, f, g);
- h += t1;
- d = t1 + t2;
-
- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
- t2 = Sum0 (d) + Maj (d, e, f);
- g += t1;
- c = t1 + t2;
-
- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
- t2 = Sum0 (c) + Maj (c, d, e);
- f += t1;
- b = t1 + t2;
-
- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
- t2 = Sum0 (b) + Maj (b, c, d);
- e += t1;
- a = t1 + t2;
-
- t += 16;
-#endif
- }
-
- /* Update chaining vars. */
- hd->h0 += a;
- hd->h1 += b;
- hd->h2 += c;
- hd->h3 += d;
- hd->h4 += e;
- hd->h5 += f;
- hd->h6 += g;
- hd->h7 += h;
-
- return /* burn_stack */ (8 + 16) * sizeof(u64) + sizeof(u32) +
- 3 * sizeof(void*);
-}
-#endif /*!USE_ARM_ASM*/
-
-/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
- * stack to store XMM6-XMM15 needed on Win64. */
-#undef ASM_FUNC_ABI
-#undef ASM_EXTRA_STACK
-#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-# define ASM_FUNC_ABI __attribute__((sysv_abi))
-# define ASM_EXTRA_STACK (10 * 16)
-# else
-# define ASM_FUNC_ABI
-# define ASM_EXTRA_STACK 0
-# endif
-#endif
-
-
-#ifdef USE_ARM_NEON_ASM
-void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
- const unsigned char *data,
- const u64 k[], size_t num_blks);
-#endif
-
-#ifdef USE_ARM_ASM
-unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
- const unsigned char *data,
- const u64 k[], size_t num_blks);
-#endif
-
-#ifdef USE_SSSE3
-unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
- void *state,
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX
-unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
- void *state,
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX2
-unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
- void *state,
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-
-static unsigned int
-transform (void *context, const unsigned char *data, size_t nblks)
+do_transform_generic (void *context, const unsigned char *data, size_t nblks)
{
SHA512_CONTEXT *ctx = context;
- unsigned int burn;
-
-#ifdef USE_AVX2
- if (ctx->use_avx2)
- return _gcry_sha512_transform_amd64_avx2 (data, &ctx->state, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
-#endif
-
-#ifdef USE_AVX
- if (ctx->use_avx)
- return _gcry_sha512_transform_amd64_avx (data, &ctx->state, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
-#endif
-
-#ifdef USE_SSSE3
- if (ctx->use_ssse3)
- return _gcry_sha512_transform_amd64_ssse3 (data, &ctx->state, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
-#endif
+ SHA512_STATE *hd = &ctx->state;
-#ifdef USE_ARM_NEON_ASM
- if (ctx->use_neon)
+ do
{
- _gcry_sha512_transform_armv7_neon (&ctx->state, data, k, nblks);
+ u64 a, b, c, d, e, f, g, h;
+ u64 w[16];
+ int t;
+
+ /* get values from the chaining vars */
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+ f = hd->h5;
+ g = hd->h6;
+ h = hd->h7;
+
+ for ( t = 0; t < 16; t++ )
+ w[t] = buf_get_be64(data + t * 8);
- /* _gcry_sha512_transform_armv7_neon does not store sensitive data
- * to stack. */
- return /* no burn_stack */ 0;
- }
-#endif
+#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+
+ for (t = 0; t < 80 - 16; )
+ {
+ u64 t1, t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
+ w[0] += S1 (w[14]) + w[9] + S0 (w[1]);
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
+ w[1] += S1 (w[15]) + w[10] + S0 (w[2]);
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
+ w[2] += S1 (w[0]) + w[11] + S0 (w[3]);
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
+ w[3] += S1 (w[1]) + w[12] + S0 (w[4]);
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
+ w[4] += S1 (w[2]) + w[13] + S0 (w[5]);
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
+ w[5] += S1 (w[3]) + w[14] + S0 (w[6]);
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
+ w[6] += S1 (w[4]) + w[15] + S0 (w[7]);
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
+ w[7] += S1 (w[5]) + w[0] + S0 (w[8]);
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
+ w[8] += S1 (w[6]) + w[1] + S0 (w[9]);
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
+ w[9] += S1 (w[7]) + w[2] + S0 (w[10]);
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
+ w[10] += S1 (w[8]) + w[3] + S0 (w[11]);
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
+ w[11] += S1 (w[9]) + w[4] + S0 (w[12]);
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
+ w[12] += S1 (w[10]) + w[5] + S0 (w[13]);
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
+ w[13] += S1 (w[11]) + w[6] + S0 (w[14]);
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
+ w[14] += S1 (w[12]) + w[7] + S0 (w[15]);
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
+ w[15] += S1 (w[13]) + w[8] + S0 (w[0]);
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t += 16;
+ }
+
+ for (; t < 80; )
+ {
+ u64 t1, t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t += 16;
+ }
+
+ /* Update chaining vars. */
+ hd->h0 += a;
+ hd->h1 += b;
+ hd->h2 += c;
+ hd->h3 += d;
+ hd->h4 += e;
+ hd->h5 += f;
+ hd->h6 += g;
+ hd->h7 += h;
-#ifdef USE_ARM_ASM
- burn = _gcry_sha512_transform_arm (&ctx->state, data, k, nblks);
-#else
- do
- {
- burn = transform_blk (&ctx->state, data) + 3 * sizeof(void*);
data += 128;
}
while (--nblks);
-#ifdef ASM_EXTRA_STACK
- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
- * here too.
- */
- burn += ASM_EXTRA_STACK;
-#endif
-#endif
-
- return burn;
+ return (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*);
}
+#endif /*!USE_ARM_ASM*/
/* The routine final terminates the computation and
* returns the digest.
* The handle is prepared for a new cycle, but adding bytes to the
* handle will the destroy the returned buffer.
* Returns: 64 bytes representing the digest. When used for sha384,
* we take the leftmost 48 of those bytes.
*/
static void
sha512_final (void *context)
{
SHA512_CONTEXT *hd = context;
unsigned int stack_burn_depth;
u64 t, th, msb, lsb;
byte *p;
_gcry_md_block_write (context, NULL, 0); /* flush */ ;
t = hd->bctx.nblocks;
/* if (sizeof t == sizeof hd->bctx.nblocks) */
th = hd->bctx.nblocks_high;
/* else */
/* th = hd->bctx.nblocks >> 64; In case we ever use u128 */
/* multiply by 128 to make a byte count */
lsb = t << 7;
msb = (th << 7) | (t >> 57);
/* add the count */
t = lsb;
if ((lsb += hd->bctx.count) < t)
msb++;
/* multiply by 8 to make a bit count */
t = lsb;
lsb <<= 3;
msb <<= 3;
msb |= t >> 61;
if (hd->bctx.count < 112)
{ /* enough room */
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
while (hd->bctx.count < 112)
hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
}
else
{ /* need one extra block */
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
while (hd->bctx.count < 128)
hd->bctx.buf[hd->bctx.count++] = 0;
_gcry_md_block_write (context, NULL, 0); /* flush */ ;
memset (hd->bctx.buf, 0, 112); /* fill next block with zeroes */
}
/* append the 128 bit count */
buf_put_be64(hd->bctx.buf + 112, msb);
buf_put_be64(hd->bctx.buf + 120, lsb);
- stack_burn_depth = transform (hd, hd->bctx.buf, 1);
+ stack_burn_depth = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
_gcry_burn_stack (stack_burn_depth);
p = hd->bctx.buf;
#define X(a) do { buf_put_be64(p, hd->state.h##a); p += 8; } while (0)
X (0);
X (1);
X (2);
X (3);
X (4);
X (5);
/* Note that these last two chunks are included even for SHA384.
We just ignore them. */
X (6);
X (7);
#undef X
}
static byte *
sha512_read (void *context)
{
SHA512_CONTEXT *hd = (SHA512_CONTEXT *) context;
return hd->bctx.buf;
}
/* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 64 bytes. */
void
_gcry_sha512_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA512_CONTEXT hd;
sha512_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha512_final (&hd);
memcpy (outbuf, hd.bctx.buf, 64);
}
/* Variant of the above shortcut function using multiple buffers. */
void
_gcry_sha512_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA512_CONTEXT hd;
sha512_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha512_final (&hd);
memcpy (outbuf, hd.bctx.buf, 64);
}
/* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 48 bytes. */
static void
_gcry_sha384_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA512_CONTEXT hd;
sha384_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha512_final (&hd);
memcpy (outbuf, hd.bctx.buf, 48);
}
/* Variant of the above shortcut function using multiple buffers. */
static void
_gcry_sha384_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA512_CONTEXT hd;
sha384_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha512_final (&hd);
memcpy (outbuf, hd.bctx.buf, 48);
}
/*
Self-test section.
*/
static gpg_err_code_t
selftests_sha384 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA384, 0,
"abc", 3,
"\xcb\x00\x75\x3f\x45\xa3\x5e\x8b\xb5\xa0\x3d\x69\x9a\xc6\x50\x07"
"\x27\x2c\x32\xab\x0e\xde\xd1\x63\x1a\x8b\x60\x5a\x43\xff\x5b\xed"
"\x80\x86\x07\x2b\xa1\xe7\xcc\x23\x58\xba\xec\xa1\x34\xc8\x25\xa7", 48);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA384, 0,
"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
"hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
"\x09\x33\x0C\x33\xF7\x11\x47\xE8\x3D\x19\x2F\xC7\x82\xCD\x1B\x47"
"\x53\x11\x1B\x17\x3B\x3B\x05\xD2\x2F\xA0\x80\x86\xE3\xB0\xF7\x12"
"\xFC\xC7\xC7\x1A\x55\x7E\x2D\xB9\x66\xC3\xE9\xFA\x91\x74\x60\x39",
48);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA384, 1,
NULL, 0,
"\x9D\x0E\x18\x09\x71\x64\x74\xCB\x08\x6E\x83\x4E\x31\x0A\x4A\x1C"
"\xED\x14\x9E\x9C\x00\xF2\x48\x52\x79\x72\xCE\xC5\x70\x4C\x2A\x5B"
"\x07\xB8\xB3\xDC\x38\xEC\xC4\xEB\xAE\x97\xDD\xD8\x7F\x3D\x89\x85",
48);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA384, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
static gpg_err_code_t
selftests_sha512 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA512, 0,
"abc", 3,
"\xDD\xAF\x35\xA1\x93\x61\x7A\xBA\xCC\x41\x73\x49\xAE\x20\x41\x31"
"\x12\xE6\xFA\x4E\x89\xA9\x7E\xA2\x0A\x9E\xEE\xE6\x4B\x55\xD3\x9A"
"\x21\x92\x99\x2A\x27\x4F\xC1\xA8\x36\xBA\x3C\x23\xA3\xFE\xEB\xBD"
"\x45\x4D\x44\x23\x64\x3C\xE8\x0E\x2A\x9A\xC9\x4F\xA5\x4C\xA4\x9F", 64);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA512, 0,
"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
"hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
"\x8E\x95\x9B\x75\xDA\xE3\x13\xDA\x8C\xF4\xF7\x28\x14\xFC\x14\x3F"
"\x8F\x77\x79\xC6\xEB\x9F\x7F\xA1\x72\x99\xAE\xAD\xB6\x88\x90\x18"
"\x50\x1D\x28\x9E\x49\x00\xF7\xE4\x33\x1B\x99\xDE\xC4\xB5\x43\x3A"
"\xC7\xD3\x29\xEE\xB6\xDD\x26\x54\x5E\x96\xE5\x5B\x87\x4B\xE9\x09",
64);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA512, 1,
NULL, 0,
"\xE7\x18\x48\x3D\x0C\xE7\x69\x64\x4E\x2E\x42\xC7\xBC\x15\xB4\x63"
"\x8E\x1F\x98\xB1\x3B\x20\x44\x28\x56\x32\xA8\x03\xAF\xA9\x73\xEB"
"\xDE\x0F\xF2\x44\x87\x7E\xA6\x0A\x4C\xB0\x43\x2C\xE5\x77\xC3\x1B"
"\xEB\x00\x9C\x5C\x2C\x49\xAA\x2E\x4E\xAD\xB2\x17\xAD\x8C\xC0\x9B",
64);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA512, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
/* Run a full self-test for ALGO and return 0 on success. */
static gpg_err_code_t
run_selftests (int algo, int extended, selftest_report_func_t report)
{
gpg_err_code_t ec;
switch (algo)
{
case GCRY_MD_SHA384:
ec = selftests_sha384 (extended, report);
break;
case GCRY_MD_SHA512:
ec = selftests_sha512 (extended, report);
break;
default:
ec = GPG_ERR_DIGEST_ALGO;
break;
}
return ec;
}
static byte sha512_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.3 */
{
0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x05,
0x00, 0x04, 0x40
};
static gcry_md_oid_spec_t oid_spec_sha512[] =
{
{ "2.16.840.1.101.3.4.2.3" },
/* PKCS#1 sha512WithRSAEncryption */
{ "1.2.840.113549.1.1.13" },
{ NULL }
};
gcry_md_spec_t _gcry_digest_spec_sha512 =
{
GCRY_MD_SHA512, {0, 1},
"SHA512", sha512_asn, DIM (sha512_asn), oid_spec_sha512, 64,
sha512_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
_gcry_sha512_hash_buffer, _gcry_sha512_hash_buffers,
sizeof (SHA512_CONTEXT),
run_selftests
};
static byte sha384_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.2 */
{
0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, 0x05,
0x00, 0x04, 0x30
};
static gcry_md_oid_spec_t oid_spec_sha384[] =
{
{ "2.16.840.1.101.3.4.2.2" },
/* PKCS#1 sha384WithRSAEncryption */
{ "1.2.840.113549.1.1.12" },
/* SHA384WithECDSA: RFC 7427 (A.3.3.) */
{ "1.2.840.10045.4.3.3" },
{ NULL },
};
gcry_md_spec_t _gcry_digest_spec_sha384 =
{
GCRY_MD_SHA384, {0, 1},
"SHA384", sha384_asn, DIM (sha384_asn), oid_spec_sha384, 48,
sha384_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
_gcry_sha384_hash_buffer, _gcry_sha384_hash_buffers,
sizeof (SHA512_CONTEXT),
run_selftests
};