Page Menu
Home
GnuPG
Search
Configure Global Search
Log In
Files
F34140235
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Size
109 KB
Subscribers
None
View Options
diff --git a/cipher/sha1.c b/cipher/sha1.c
index e50262ff..76c486c7 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -1,671 +1,664 @@
/* sha1.c - SHA1 hash function
* Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/* Test vectors:
*
* "abc"
* A999 3E36 4706 816A BA3E 2571 7850 C26C 9CD0 D89D
*
* "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
* 8498 3E44 1C3B D26E BAAE 4AA1 F951 29E5 E546 70F1
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_STDINT_H
# include <stdint.h>
#endif
#include "g10lib.h"
#include "bithelp.h"
#include "bufhelp.h"
#include "cipher.h"
#include "sha1.h"
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSSE3 1
#endif
/* USE_AVX indicates whether to compile with Intel AVX code. */
#undef USE_AVX
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX 1
#endif
/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
#undef USE_BMI2
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_BMI2 1
#endif
/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
#undef USE_SHAEXT
#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
defined(HAVE_GCC_INLINE_ASM_SSE41) && \
defined(ENABLE_SHAEXT_SUPPORT)
# define USE_SHAEXT 1
#endif
/* USE_NEON indicates whether to enable ARM NEON assembly code. */
#undef USE_NEON
#ifdef ENABLE_NEON_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_NEON)
# define USE_NEON 1
# endif
#endif
/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
* code. */
#undef USE_ARM_CE
#ifdef ENABLE_ARM_CRYPTO_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
# define USE_ARM_CE 1
# elif defined(__AARCH64EL__) \
&& defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
# define USE_ARM_CE 1
# endif
#endif
/* A macro to test whether P is properly aligned for an u32 type.
Note that config.h provides a suitable replacement for uintptr_t if
it does not exist in stdint.h. */
/* #if __GNUC__ >= 2 */
/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % __alignof__ (u32))) */
/* #else */
/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % sizeof (u32))) */
/* #endif */
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) || \
+ defined(USE_SHAEXT)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_SSSE3
+unsigned int
+_gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX
+unsigned int
+_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_BMI2
+unsigned int
+_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
+ size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
+ + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_SHAEXT
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha1_transform_intel_shaext (void *state, const unsigned char *data,
+ size_t nblks);
+
static unsigned int
-transform (void *c, const unsigned char *data, size_t nblks);
+do_sha1_transform_intel_shaext (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_NEON
+unsigned int
+_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data,
+ size_t nblks);
+
+static unsigned int
+do_sha1_transform_armv7_neon (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_ARM_CE
+unsigned int
+_gcry_sha1_transform_armv8_ce (void *state, const unsigned char *data,
+ size_t nblks);
+
+static unsigned int
+do_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA1_CONTEXT *hd = ctx;
+ return _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
+}
+#endif
+
+
+static unsigned int
+do_transform_generic (void *c, const unsigned char *data, size_t nblks);
static void
sha1_init (void *context, unsigned int flags)
{
SHA1_CONTEXT *hd = context;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
hd->h0 = 0x67452301;
hd->h1 = 0xefcdab89;
hd->h2 = 0x98badcfe;
hd->h3 = 0x10325476;
hd->h4 = 0xc3d2e1f0;
hd->bctx.nblocks = 0;
hd->bctx.nblocks_high = 0;
hd->bctx.count = 0;
hd->bctx.blocksize = 64;
- hd->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ hd->bctx.bwrite = do_transform_generic;
#ifdef USE_SSSE3
- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ hd->bctx.bwrite = do_sha1_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ hd->bctx.bwrite = do_sha1_transform_amd64_avx;
#endif
#ifdef USE_BMI2
- hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha1_transform_amd64_avx_bmi2;
#endif
#ifdef USE_SHAEXT
- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
- && (features & HWF_INTEL_SSE4_1);
+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+ hd->bctx.bwrite = do_sha1_transform_intel_shaext;
#endif
#ifdef USE_NEON
- hd->use_neon = (features & HWF_ARM_NEON) != 0;
+ if ((features & HWF_ARM_NEON) != 0)
+ hd->bctx.bwrite = do_sha1_transform_armv7_neon;
#endif
#ifdef USE_ARM_CE
- hd->use_arm_ce = (features & HWF_ARM_SHA1) != 0;
+ if ((features & HWF_ARM_SHA1) != 0)
+ hd->bctx.bwrite = do_sha1_transform_armv8_ce;
#endif
+
(void)features;
}
/*
* Initialize the context HD. This is used to prepare the use of
* _gcry_sha1_mixblock. WARNING: This is a special purpose function
* for exclusive use by random-csprng.c.
*/
void
_gcry_sha1_mixblock_init (SHA1_CONTEXT *hd)
{
sha1_init (hd, 0);
}
/* Round function macros. */
#define K1 0x5A827999L
#define K2 0x6ED9EBA1L
#define K3 0x8F1BBCDCL
#define K4 0xCA62C1D6L
#define F1(x,y,z) ( z ^ ( x & ( y ^ z ) ) )
#define F2(x,y,z) ( x ^ y ^ z )
#define F3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) )
#define F4(x,y,z) ( x ^ y ^ z )
#define M(i) ( tm = x[ i &0x0f] \
^ x[(i-14)&0x0f] \
^ x[(i-8) &0x0f] \
^ x[(i-3) &0x0f], \
(x[i&0x0f] = rol(tm, 1)))
#define R(a,b,c,d,e,f,k,m) do { e += rol( a, 5 ) \
+ f( b, c, d ) \
+ k \
+ m; \
b = rol( b, 30 ); \
} while(0)
-
-#ifdef USE_NEON
-unsigned int
-_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data,
- size_t nblks);
-#endif
-
-#ifdef USE_ARM_CE
-unsigned int
-_gcry_sha1_transform_armv8_ce (void *state, const unsigned char *data,
- size_t nblks);
-#endif
-
/*
* Transform NBLOCKS of each 64 bytes (16 32-bit words) at DATA.
*/
static unsigned int
-transform_blk (void *ctx, const unsigned char *data)
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
{
SHA1_CONTEXT *hd = ctx;
- const u32 *idata = (const void *)data;
- register u32 a, b, c, d, e; /* Local copies of the chaining variables. */
- register u32 tm; /* Helper. */
- u32 x[16]; /* The array we work on. */
+
+ do
+ {
+ const u32 *idata = (const void *)data;
+ u32 a, b, c, d, e; /* Local copies of the chaining variables. */
+ u32 tm; /* Helper. */
+ u32 x[16]; /* The array we work on. */
#define I(i) (x[i] = buf_get_be32(idata + i))
/* Get the values of the chaining variables. */
a = hd->h0;
b = hd->h1;
c = hd->h2;
d = hd->h3;
e = hd->h4;
/* Transform. */
R( a, b, c, d, e, F1, K1, I( 0) );
R( e, a, b, c, d, F1, K1, I( 1) );
R( d, e, a, b, c, F1, K1, I( 2) );
R( c, d, e, a, b, F1, K1, I( 3) );
R( b, c, d, e, a, F1, K1, I( 4) );
R( a, b, c, d, e, F1, K1, I( 5) );
R( e, a, b, c, d, F1, K1, I( 6) );
R( d, e, a, b, c, F1, K1, I( 7) );
R( c, d, e, a, b, F1, K1, I( 8) );
R( b, c, d, e, a, F1, K1, I( 9) );
R( a, b, c, d, e, F1, K1, I(10) );
R( e, a, b, c, d, F1, K1, I(11) );
R( d, e, a, b, c, F1, K1, I(12) );
R( c, d, e, a, b, F1, K1, I(13) );
R( b, c, d, e, a, F1, K1, I(14) );
R( a, b, c, d, e, F1, K1, I(15) );
R( e, a, b, c, d, F1, K1, M(16) );
R( d, e, a, b, c, F1, K1, M(17) );
R( c, d, e, a, b, F1, K1, M(18) );
R( b, c, d, e, a, F1, K1, M(19) );
R( a, b, c, d, e, F2, K2, M(20) );
R( e, a, b, c, d, F2, K2, M(21) );
R( d, e, a, b, c, F2, K2, M(22) );
R( c, d, e, a, b, F2, K2, M(23) );
R( b, c, d, e, a, F2, K2, M(24) );
R( a, b, c, d, e, F2, K2, M(25) );
R( e, a, b, c, d, F2, K2, M(26) );
R( d, e, a, b, c, F2, K2, M(27) );
R( c, d, e, a, b, F2, K2, M(28) );
R( b, c, d, e, a, F2, K2, M(29) );
R( a, b, c, d, e, F2, K2, M(30) );
R( e, a, b, c, d, F2, K2, M(31) );
R( d, e, a, b, c, F2, K2, M(32) );
R( c, d, e, a, b, F2, K2, M(33) );
R( b, c, d, e, a, F2, K2, M(34) );
R( a, b, c, d, e, F2, K2, M(35) );
R( e, a, b, c, d, F2, K2, M(36) );
R( d, e, a, b, c, F2, K2, M(37) );
R( c, d, e, a, b, F2, K2, M(38) );
R( b, c, d, e, a, F2, K2, M(39) );
R( a, b, c, d, e, F3, K3, M(40) );
R( e, a, b, c, d, F3, K3, M(41) );
R( d, e, a, b, c, F3, K3, M(42) );
R( c, d, e, a, b, F3, K3, M(43) );
R( b, c, d, e, a, F3, K3, M(44) );
R( a, b, c, d, e, F3, K3, M(45) );
R( e, a, b, c, d, F3, K3, M(46) );
R( d, e, a, b, c, F3, K3, M(47) );
R( c, d, e, a, b, F3, K3, M(48) );
R( b, c, d, e, a, F3, K3, M(49) );
R( a, b, c, d, e, F3, K3, M(50) );
R( e, a, b, c, d, F3, K3, M(51) );
R( d, e, a, b, c, F3, K3, M(52) );
R( c, d, e, a, b, F3, K3, M(53) );
R( b, c, d, e, a, F3, K3, M(54) );
R( a, b, c, d, e, F3, K3, M(55) );
R( e, a, b, c, d, F3, K3, M(56) );
R( d, e, a, b, c, F3, K3, M(57) );
R( c, d, e, a, b, F3, K3, M(58) );
R( b, c, d, e, a, F3, K3, M(59) );
R( a, b, c, d, e, F4, K4, M(60) );
R( e, a, b, c, d, F4, K4, M(61) );
R( d, e, a, b, c, F4, K4, M(62) );
R( c, d, e, a, b, F4, K4, M(63) );
R( b, c, d, e, a, F4, K4, M(64) );
R( a, b, c, d, e, F4, K4, M(65) );
R( e, a, b, c, d, F4, K4, M(66) );
R( d, e, a, b, c, F4, K4, M(67) );
R( c, d, e, a, b, F4, K4, M(68) );
R( b, c, d, e, a, F4, K4, M(69) );
R( a, b, c, d, e, F4, K4, M(70) );
R( e, a, b, c, d, F4, K4, M(71) );
R( d, e, a, b, c, F4, K4, M(72) );
R( c, d, e, a, b, F4, K4, M(73) );
R( b, c, d, e, a, F4, K4, M(74) );
R( a, b, c, d, e, F4, K4, M(75) );
R( e, a, b, c, d, F4, K4, M(76) );
R( d, e, a, b, c, F4, K4, M(77) );
R( c, d, e, a, b, F4, K4, M(78) );
R( b, c, d, e, a, F4, K4, M(79) );
/* Update the chaining variables. */
hd->h0 += a;
hd->h1 += b;
hd->h2 += c;
hd->h3 += d;
hd->h4 += e;
- return /* burn_stack */ 88+4*sizeof(void*);
-}
-
-
-/* Assembly implementations use SystemV ABI, ABI conversion and additional
- * stack to store XMM6-XMM15 needed on Win64. */
-#undef ASM_FUNC_ABI
-#undef ASM_EXTRA_STACK
-#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) || \
- defined(USE_SHAEXT)
-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-# define ASM_FUNC_ABI __attribute__((sysv_abi))
-# define ASM_EXTRA_STACK (10 * 16)
-# else
-# define ASM_FUNC_ABI
-# define ASM_EXTRA_STACK 0
-# endif
-#endif
-
-
-#ifdef USE_SSSE3
-unsigned int
-_gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
- size_t nblks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX
-unsigned int
-_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
- size_t nblks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_BMI2
-unsigned int
-_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
- size_t nblks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_SHAEXT
-/* Does not need ASM_FUNC_ABI */
-unsigned int
-_gcry_sha1_transform_intel_shaext (void *state, const unsigned char *data,
- size_t nblks);
-#endif
-
-
-static unsigned int
-transform (void *ctx, const unsigned char *data, size_t nblks)
-{
- SHA1_CONTEXT *hd = ctx;
- unsigned int burn;
-
-#ifdef USE_SHAEXT
- if (hd->use_shaext)
- {
- burn = _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_BMI2
- if (hd->use_bmi2)
- {
- burn = _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_AVX
- if (hd->use_avx)
- {
- burn = _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_SSSE3
- if (hd->use_ssse3)
- {
- burn = _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_ARM_CE
- if (hd->use_arm_ce)
- {
- burn = _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) : 0;
- return burn;
- }
-#endif
-#ifdef USE_NEON
- if (hd->use_neon)
- {
- burn = _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) : 0;
- return burn;
- }
-#endif
-
- do
- {
- burn = transform_blk (hd, data);
data += 64;
}
while (--nblks);
-#ifdef ASM_EXTRA_STACK
- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
- * here too.
- */
- burn += ASM_EXTRA_STACK;
-#endif
-
- return burn;
+ return 88+4*sizeof(void*);
}
/*
* Apply the SHA-1 transform function on the buffer BLOCKOF64BYTE
* which must have a length 64 bytes. BLOCKOF64BYTE must be 32-bit
* aligned. Updates the 20 bytes in BLOCKOF64BYTE with its mixed
* content. Returns the number of bytes which should be burned on the
* stack. You need to use _gcry_sha1_mixblock_init to initialize the
* context.
* WARNING: This is a special purpose function for exclusive use by
* random-csprng.c.
*/
unsigned int
_gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte)
{
u32 *p = blockof64byte;
unsigned int nburn;
- nburn = transform (hd, blockof64byte, 1);
+ nburn = (*hd->bctx.bwrite) (hd, blockof64byte, 1);
p[0] = hd->h0;
p[1] = hd->h1;
p[2] = hd->h2;
p[3] = hd->h3;
p[4] = hd->h4;
return nburn;
}
/* The routine final terminates the computation and
* returns the digest.
* The handle is prepared for a new cycle, but adding bytes to the
* handle will the destroy the returned buffer.
* Returns: 20 bytes representing the digest.
*/
static void
sha1_final(void *context)
{
SHA1_CONTEXT *hd = context;
u32 t, th, msb, lsb;
unsigned char *p;
unsigned int burn;
_gcry_md_block_write (hd, NULL, 0); /* flush */;
t = hd->bctx.nblocks;
if (sizeof t == sizeof hd->bctx.nblocks)
th = hd->bctx.nblocks_high;
else
th = hd->bctx.nblocks >> 32;
/* multiply by 64 to make a byte count */
lsb = t << 6;
msb = (th << 6) | (t >> 26);
/* add the count */
t = lsb;
if( (lsb += hd->bctx.count) < t )
msb++;
/* multiply by 8 to make a bit count */
t = lsb;
lsb <<= 3;
msb <<= 3;
msb |= t >> 29;
if( hd->bctx.count < 56 ) /* enough room */
{
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
while( hd->bctx.count < 56 )
hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
}
else /* need one extra block */
{
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
while( hd->bctx.count < 64 )
hd->bctx.buf[hd->bctx.count++] = 0;
_gcry_md_block_write(hd, NULL, 0); /* flush */;
memset(hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */
}
/* append the 64 bit count */
buf_put_be32(hd->bctx.buf + 56, msb);
buf_put_be32(hd->bctx.buf + 60, lsb);
- burn = transform( hd, hd->bctx.buf, 1 );
+ burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 1 );
_gcry_burn_stack (burn);
p = hd->bctx.buf;
#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
X(0);
X(1);
X(2);
X(3);
X(4);
#undef X
}
static unsigned char *
sha1_read( void *context )
{
SHA1_CONTEXT *hd = context;
return hd->bctx.buf;
}
/****************
* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 20 bytes.
*/
void
_gcry_sha1_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA1_CONTEXT hd;
sha1_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha1_final (&hd);
memcpy (outbuf, hd.bctx.buf, 20);
}
/* Variant of the above shortcut function using a multiple buffers. */
void
_gcry_sha1_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA1_CONTEXT hd;
sha1_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha1_final (&hd);
memcpy (outbuf, hd.bctx.buf, 20);
}
/*
Self-test section.
*/
static gpg_err_code_t
selftests_sha1 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA1, 0,
"abc", 3,
"\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E"
"\x25\x71\x78\x50\xC2\x6C\x9C\xD0\xD8\x9D", 20);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA1, 0,
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
"\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE"
"\x4A\xA1\xF9\x51\x29\xE5\xE5\x46\x70\xF1", 20);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA1, 1,
NULL, 0,
"\x34\xAA\x97\x3C\xD4\xC4\xDA\xA4\xF6\x1E"
"\xEB\x2B\xDB\xAD\x27\x31\x65\x34\x01\x6F", 20);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA1, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
/* Run a full self-test for ALGO and return 0 on success. */
static gpg_err_code_t
run_selftests (int algo, int extended, selftest_report_func_t report)
{
gpg_err_code_t ec;
switch (algo)
{
case GCRY_MD_SHA1:
ec = selftests_sha1 (extended, report);
break;
default:
ec = GPG_ERR_DIGEST_ALGO;
break;
}
return ec;
}
static unsigned char asn[15] = /* Object ID is 1.3.14.3.2.26 */
{ 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03,
0x02, 0x1a, 0x05, 0x00, 0x04, 0x14 };
static gcry_md_oid_spec_t oid_spec_sha1[] =
{
/* iso.member-body.us.rsadsi.pkcs.pkcs-1.5 (sha1WithRSAEncryption) */
{ "1.2.840.113549.1.1.5" },
/* iso.member-body.us.x9-57.x9cm.3 (dsaWithSha1)*/
{ "1.2.840.10040.4.3" },
/* from NIST's OIW (sha1) */
{ "1.3.14.3.2.26" },
/* from NIST OIW (sha-1WithRSAEncryption) */
{ "1.3.14.3.2.29" },
/* iso.member-body.us.ansi-x9-62.signatures.ecdsa-with-sha1 */
{ "1.2.840.10045.4.1" },
{ NULL },
};
gcry_md_spec_t _gcry_digest_spec_sha1 =
{
GCRY_MD_SHA1, {0, 1},
"SHA1", asn, DIM (asn), oid_spec_sha1, 20,
sha1_init, _gcry_md_block_write, sha1_final, sha1_read, NULL,
_gcry_sha1_hash_buffer, _gcry_sha1_hash_buffers,
sizeof (SHA1_CONTEXT),
run_selftests
};
diff --git a/cipher/sha1.h b/cipher/sha1.h
index 93ce79b5..acf764ba 100644
--- a/cipher/sha1.h
+++ b/cipher/sha1.h
@@ -1,41 +1,35 @@
/* sha1.h - SHA-1 context definition
* Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef GCRY_SHA1_H
#define GCRY_SHA1_H
#include "hash-common.h"
/* We need this here for direct use by random-csprng.c. */
typedef struct
{
gcry_md_block_ctx_t bctx;
u32 h0,h1,h2,h3,h4;
- unsigned int use_ssse3:1;
- unsigned int use_avx:1;
- unsigned int use_bmi2:1;
- unsigned int use_shaext:1;
- unsigned int use_neon:1;
- unsigned int use_arm_ce:1;
} SHA1_CONTEXT;
void _gcry_sha1_mixblock_init (SHA1_CONTEXT *hd);
unsigned int _gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte);
#endif /*GCRY_SHA1_H*/
diff --git a/cipher/sha256.c b/cipher/sha256.c
index 06959707..e82a9d90 100644
--- a/cipher/sha256.c
+++ b/cipher/sha256.c
@@ -1,788 +1,769 @@
/* sha256.c - SHA256 hash function
* Copyright (C) 2003, 2006, 2008, 2009 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/* Test vectors:
"abc"
SHA224: 23097d22 3405d822 8642a477 bda255b3 2aadbce4 bda0b3f7 e36c9da7
SHA256: ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
SHA224: 75388b16 512776cc 5dba5da1 fd890150 b0c6455c b4f58b19 52522525
SHA256: 248d6a61 d20638b8 e5c02693 0c3e6039 a33ce459 64ff2167 f6ecedd4 19db06c1
"a" one million times
SHA224: 20794655 980c91d8 bbb4c1ea 97618a4b f03f4258 1948b2ee 4ee7ad67
SHA256: cdc76e5c 9914fb92 81a1c7e2 84d73e67 f1809a48 a497200e 046d39cc c7112cd0
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "g10lib.h"
#include "bithelp.h"
#include "bufhelp.h"
#include "cipher.h"
#include "hash-common.h"
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSSE3 1
#endif
/* USE_AVX indicates whether to compile with Intel AVX code. */
#undef USE_AVX
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX 1
#endif
/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */
#undef USE_AVX2
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX2 1
#endif
/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
#undef USE_SHAEXT
#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
defined(HAVE_GCC_INLINE_ASM_SSE41) && \
defined(ENABLE_SHAEXT_SUPPORT)
# define USE_SHAEXT 1
#endif
/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
* code. */
#undef USE_ARM_CE
#ifdef ENABLE_ARM_CRYPTO_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
# define USE_ARM_CE 1
# elif defined(__AARCH64EL__) \
&& defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
# define USE_ARM_CE 1
# endif
#endif
typedef struct {
gcry_md_block_ctx_t bctx;
u32 h0,h1,h2,h3,h4,h5,h6,h7;
+} SHA256_CONTEXT;
+
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \
+ defined(USE_SHAEXT)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
#ifdef USE_SSSE3
- unsigned int use_ssse3:1;
+unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_ssse3(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_AVX
- unsigned int use_avx:1;
+unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_avx(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_AVX2
- unsigned int use_avx2:1;
+unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
+ u32 state[8],
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_avx2(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_SHAEXT
- unsigned int use_shaext:1;
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha256_transform_intel_shaext(u32 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha256_transform_intel_shaext(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks);
+}
#endif
+
#ifdef USE_ARM_CE
- unsigned int use_arm_ce:1;
+unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8],
+ const void *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha256_transform_armv8_ce(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks);
+}
#endif
-} SHA256_CONTEXT;
static unsigned int
-transform (void *c, const unsigned char *data, size_t nblks);
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks);
static void
sha256_init (void *context, unsigned int flags)
{
SHA256_CONTEXT *hd = context;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
hd->h0 = 0x6a09e667;
hd->h1 = 0xbb67ae85;
hd->h2 = 0x3c6ef372;
hd->h3 = 0xa54ff53a;
hd->h4 = 0x510e527f;
hd->h5 = 0x9b05688c;
hd->h6 = 0x1f83d9ab;
hd->h7 = 0x5be0cd19;
hd->bctx.nblocks = 0;
hd->bctx.nblocks_high = 0;
hd->bctx.count = 0;
hd->bctx.blocksize = 64;
- hd->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ hd->bctx.bwrite = do_transform_generic;
#ifdef USE_SSSE3
- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ hd->bctx.bwrite = do_sha256_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx;
#endif
#ifdef USE_AVX2
- hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx2;
#endif
#ifdef USE_SHAEXT
- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
- && (features & HWF_INTEL_SSE4_1);
+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+ hd->bctx.bwrite = do_sha256_transform_intel_shaext;
#endif
#ifdef USE_ARM_CE
- hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0;
+ if ((features & HWF_ARM_SHA2) != 0)
+ hd->bctx.bwrite = do_sha256_transform_armv8_ce;
#endif
(void)features;
}
static void
sha224_init (void *context, unsigned int flags)
{
SHA256_CONTEXT *hd = context;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
hd->h0 = 0xc1059ed8;
hd->h1 = 0x367cd507;
hd->h2 = 0x3070dd17;
hd->h3 = 0xf70e5939;
hd->h4 = 0xffc00b31;
hd->h5 = 0x68581511;
hd->h6 = 0x64f98fa7;
hd->h7 = 0xbefa4fa4;
hd->bctx.nblocks = 0;
hd->bctx.nblocks_high = 0;
hd->bctx.count = 0;
hd->bctx.blocksize = 64;
- hd->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ hd->bctx.bwrite = do_transform_generic;
#ifdef USE_SSSE3
- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ hd->bctx.bwrite = do_sha256_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx;
#endif
#ifdef USE_AVX2
- hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ hd->bctx.bwrite = do_sha256_transform_amd64_avx2;
#endif
#ifdef USE_SHAEXT
- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
- && (features & HWF_INTEL_SSE4_1);
+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+ hd->bctx.bwrite = do_sha256_transform_intel_shaext;
#endif
#ifdef USE_ARM_CE
- hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0;
+ if ((features & HWF_ARM_SHA2) != 0)
+ hd->bctx.bwrite = do_sha256_transform_armv8_ce;
#endif
(void)features;
}
/*
Transform the message X which consists of 16 32-bit-words. See FIPS
180-2 for details. */
#define R(a,b,c,d,e,f,g,h,k,w) do \
{ \
t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + (k) + (w); \
t2 = Sum0((a)) + Maj((a),(b),(c)); \
d += t1; \
h = t1 + t2; \
} while (0)
/* (4.2) same as SHA-1's F1. */
#define Cho(x, y, z) (z ^ (x & (y ^ z)))
/* (4.3) same as SHA-1's F3 */
#define Maj(x, y, z) ((x & y) + (z & (x ^ y)))
/* (4.4) */
#define Sum0(x) (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22))
/* (4.5) */
#define Sum1(x) (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25))
/* Message expansion */
#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3)) /* (4.6) */
#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10)) /* (4.7) */
#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
#define W(i) ( w[i&0x0f] = S1(w[(i-2) &0x0f]) \
+ w[(i-7) &0x0f] \
+ S0(w[(i-15)&0x0f]) \
+ w[(i-16)&0x0f] )
static unsigned int
-transform_blk (void *ctx, const unsigned char *data)
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
{
SHA256_CONTEXT *hd = ctx;
static const u32 K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
- u32 a,b,c,d,e,f,g,h,t1,t2;
- u32 w[16];
-
- a = hd->h0;
- b = hd->h1;
- c = hd->h2;
- d = hd->h3;
- e = hd->h4;
- f = hd->h5;
- g = hd->h6;
- h = hd->h7;
-
- R(a, b, c, d, e, f, g, h, K[0], I(0));
- R(h, a, b, c, d, e, f, g, K[1], I(1));
- R(g, h, a, b, c, d, e, f, K[2], I(2));
- R(f, g, h, a, b, c, d, e, K[3], I(3));
- R(e, f, g, h, a, b, c, d, K[4], I(4));
- R(d, e, f, g, h, a, b, c, K[5], I(5));
- R(c, d, e, f, g, h, a, b, K[6], I(6));
- R(b, c, d, e, f, g, h, a, K[7], I(7));
- R(a, b, c, d, e, f, g, h, K[8], I(8));
- R(h, a, b, c, d, e, f, g, K[9], I(9));
- R(g, h, a, b, c, d, e, f, K[10], I(10));
- R(f, g, h, a, b, c, d, e, K[11], I(11));
- R(e, f, g, h, a, b, c, d, K[12], I(12));
- R(d, e, f, g, h, a, b, c, K[13], I(13));
- R(c, d, e, f, g, h, a, b, K[14], I(14));
- R(b, c, d, e, f, g, h, a, K[15], I(15));
-
- R(a, b, c, d, e, f, g, h, K[16], W(16));
- R(h, a, b, c, d, e, f, g, K[17], W(17));
- R(g, h, a, b, c, d, e, f, K[18], W(18));
- R(f, g, h, a, b, c, d, e, K[19], W(19));
- R(e, f, g, h, a, b, c, d, K[20], W(20));
- R(d, e, f, g, h, a, b, c, K[21], W(21));
- R(c, d, e, f, g, h, a, b, K[22], W(22));
- R(b, c, d, e, f, g, h, a, K[23], W(23));
- R(a, b, c, d, e, f, g, h, K[24], W(24));
- R(h, a, b, c, d, e, f, g, K[25], W(25));
- R(g, h, a, b, c, d, e, f, K[26], W(26));
- R(f, g, h, a, b, c, d, e, K[27], W(27));
- R(e, f, g, h, a, b, c, d, K[28], W(28));
- R(d, e, f, g, h, a, b, c, K[29], W(29));
- R(c, d, e, f, g, h, a, b, K[30], W(30));
- R(b, c, d, e, f, g, h, a, K[31], W(31));
-
- R(a, b, c, d, e, f, g, h, K[32], W(32));
- R(h, a, b, c, d, e, f, g, K[33], W(33));
- R(g, h, a, b, c, d, e, f, K[34], W(34));
- R(f, g, h, a, b, c, d, e, K[35], W(35));
- R(e, f, g, h, a, b, c, d, K[36], W(36));
- R(d, e, f, g, h, a, b, c, K[37], W(37));
- R(c, d, e, f, g, h, a, b, K[38], W(38));
- R(b, c, d, e, f, g, h, a, K[39], W(39));
- R(a, b, c, d, e, f, g, h, K[40], W(40));
- R(h, a, b, c, d, e, f, g, K[41], W(41));
- R(g, h, a, b, c, d, e, f, K[42], W(42));
- R(f, g, h, a, b, c, d, e, K[43], W(43));
- R(e, f, g, h, a, b, c, d, K[44], W(44));
- R(d, e, f, g, h, a, b, c, K[45], W(45));
- R(c, d, e, f, g, h, a, b, K[46], W(46));
- R(b, c, d, e, f, g, h, a, K[47], W(47));
-
- R(a, b, c, d, e, f, g, h, K[48], W(48));
- R(h, a, b, c, d, e, f, g, K[49], W(49));
- R(g, h, a, b, c, d, e, f, K[50], W(50));
- R(f, g, h, a, b, c, d, e, K[51], W(51));
- R(e, f, g, h, a, b, c, d, K[52], W(52));
- R(d, e, f, g, h, a, b, c, K[53], W(53));
- R(c, d, e, f, g, h, a, b, K[54], W(54));
- R(b, c, d, e, f, g, h, a, K[55], W(55));
- R(a, b, c, d, e, f, g, h, K[56], W(56));
- R(h, a, b, c, d, e, f, g, K[57], W(57));
- R(g, h, a, b, c, d, e, f, K[58], W(58));
- R(f, g, h, a, b, c, d, e, K[59], W(59));
- R(e, f, g, h, a, b, c, d, K[60], W(60));
- R(d, e, f, g, h, a, b, c, K[61], W(61));
- R(c, d, e, f, g, h, a, b, K[62], W(62));
- R(b, c, d, e, f, g, h, a, K[63], W(63));
-
- hd->h0 += a;
- hd->h1 += b;
- hd->h2 += c;
- hd->h3 += d;
- hd->h4 += e;
- hd->h5 += f;
- hd->h6 += g;
- hd->h7 += h;
-
- return /*burn_stack*/ 26*4+32;
-}
-#undef S0
-#undef S1
-#undef R
-
-
-/* Assembly implementations use SystemV ABI, ABI conversion and additional
- * stack to store XMM6-XMM15 needed on Win64. */
-#undef ASM_FUNC_ABI
-#undef ASM_EXTRA_STACK
-#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \
- defined(USE_SHAEXT)
-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-# define ASM_FUNC_ABI __attribute__((sysv_abi))
-# define ASM_EXTRA_STACK (10 * 16)
-# else
-# define ASM_FUNC_ABI
-# define ASM_EXTRA_STACK 0
-# endif
-#endif
-
-
-#ifdef USE_SSSE3
-unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
- u32 state[8],
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX
-unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
- u32 state[8],
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX2
-unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
- u32 state[8],
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_SHAEXT
-/* Does not need ASM_FUNC_ABI */
-unsigned int
-_gcry_sha256_transform_intel_shaext(u32 state[8],
- const unsigned char *input_data,
- size_t num_blks);
-#endif
-
-#ifdef USE_ARM_CE
-unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8],
- const void *input_data,
- size_t num_blks);
-#endif
-
-static unsigned int
-transform (void *ctx, const unsigned char *data, size_t nblks)
-{
- SHA256_CONTEXT *hd = ctx;
- unsigned int burn;
-
-#ifdef USE_SHAEXT
- if (hd->use_shaext)
- {
- burn = _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-
-#ifdef USE_AVX2
- if (hd->use_avx2)
- {
- burn = _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-
-#ifdef USE_AVX
- if (hd->use_avx)
- {
- burn = _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-
-#ifdef USE_SSSE3
- if (hd->use_ssse3)
+ do
{
- burn = _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks);
- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
- return burn;
- }
-#endif
-#ifdef USE_ARM_CE
- if (hd->use_arm_ce)
- {
- burn = _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks);
- burn += burn ? 4 * sizeof(void*) : 0;
- return burn;
- }
-#endif
+ u32 a,b,c,d,e,f,g,h,t1,t2;
+ u32 w[16];
+
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+ f = hd->h5;
+ g = hd->h6;
+ h = hd->h7;
+
+ R(a, b, c, d, e, f, g, h, K[0], I(0));
+ R(h, a, b, c, d, e, f, g, K[1], I(1));
+ R(g, h, a, b, c, d, e, f, K[2], I(2));
+ R(f, g, h, a, b, c, d, e, K[3], I(3));
+ R(e, f, g, h, a, b, c, d, K[4], I(4));
+ R(d, e, f, g, h, a, b, c, K[5], I(5));
+ R(c, d, e, f, g, h, a, b, K[6], I(6));
+ R(b, c, d, e, f, g, h, a, K[7], I(7));
+ R(a, b, c, d, e, f, g, h, K[8], I(8));
+ R(h, a, b, c, d, e, f, g, K[9], I(9));
+ R(g, h, a, b, c, d, e, f, K[10], I(10));
+ R(f, g, h, a, b, c, d, e, K[11], I(11));
+ R(e, f, g, h, a, b, c, d, K[12], I(12));
+ R(d, e, f, g, h, a, b, c, K[13], I(13));
+ R(c, d, e, f, g, h, a, b, K[14], I(14));
+ R(b, c, d, e, f, g, h, a, K[15], I(15));
+
+ R(a, b, c, d, e, f, g, h, K[16], W(16));
+ R(h, a, b, c, d, e, f, g, K[17], W(17));
+ R(g, h, a, b, c, d, e, f, K[18], W(18));
+ R(f, g, h, a, b, c, d, e, K[19], W(19));
+ R(e, f, g, h, a, b, c, d, K[20], W(20));
+ R(d, e, f, g, h, a, b, c, K[21], W(21));
+ R(c, d, e, f, g, h, a, b, K[22], W(22));
+ R(b, c, d, e, f, g, h, a, K[23], W(23));
+ R(a, b, c, d, e, f, g, h, K[24], W(24));
+ R(h, a, b, c, d, e, f, g, K[25], W(25));
+ R(g, h, a, b, c, d, e, f, K[26], W(26));
+ R(f, g, h, a, b, c, d, e, K[27], W(27));
+ R(e, f, g, h, a, b, c, d, K[28], W(28));
+ R(d, e, f, g, h, a, b, c, K[29], W(29));
+ R(c, d, e, f, g, h, a, b, K[30], W(30));
+ R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+ R(a, b, c, d, e, f, g, h, K[32], W(32));
+ R(h, a, b, c, d, e, f, g, K[33], W(33));
+ R(g, h, a, b, c, d, e, f, K[34], W(34));
+ R(f, g, h, a, b, c, d, e, K[35], W(35));
+ R(e, f, g, h, a, b, c, d, K[36], W(36));
+ R(d, e, f, g, h, a, b, c, K[37], W(37));
+ R(c, d, e, f, g, h, a, b, K[38], W(38));
+ R(b, c, d, e, f, g, h, a, K[39], W(39));
+ R(a, b, c, d, e, f, g, h, K[40], W(40));
+ R(h, a, b, c, d, e, f, g, K[41], W(41));
+ R(g, h, a, b, c, d, e, f, K[42], W(42));
+ R(f, g, h, a, b, c, d, e, K[43], W(43));
+ R(e, f, g, h, a, b, c, d, K[44], W(44));
+ R(d, e, f, g, h, a, b, c, K[45], W(45));
+ R(c, d, e, f, g, h, a, b, K[46], W(46));
+ R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+ R(a, b, c, d, e, f, g, h, K[48], W(48));
+ R(h, a, b, c, d, e, f, g, K[49], W(49));
+ R(g, h, a, b, c, d, e, f, K[50], W(50));
+ R(f, g, h, a, b, c, d, e, K[51], W(51));
+ R(e, f, g, h, a, b, c, d, K[52], W(52));
+ R(d, e, f, g, h, a, b, c, K[53], W(53));
+ R(c, d, e, f, g, h, a, b, K[54], W(54));
+ R(b, c, d, e, f, g, h, a, K[55], W(55));
+ R(a, b, c, d, e, f, g, h, K[56], W(56));
+ R(h, a, b, c, d, e, f, g, K[57], W(57));
+ R(g, h, a, b, c, d, e, f, K[58], W(58));
+ R(f, g, h, a, b, c, d, e, K[59], W(59));
+ R(e, f, g, h, a, b, c, d, K[60], W(60));
+ R(d, e, f, g, h, a, b, c, K[61], W(61));
+ R(c, d, e, f, g, h, a, b, K[62], W(62));
+ R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+ hd->h0 += a;
+ hd->h1 += b;
+ hd->h2 += c;
+ hd->h3 += d;
+ hd->h4 += e;
+ hd->h5 += f;
+ hd->h6 += g;
+ hd->h7 += h;
- do
- {
- burn = transform_blk (hd, data);
data += 64;
}
while (--nblks);
-#ifdef ASM_EXTRA_STACK
- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
- * here too.
- */
- burn += ASM_EXTRA_STACK;
-#endif
-
- return burn;
+ return 26*4 + 32 + 3 * sizeof(void*);
}
+#undef S0
+#undef S1
+#undef R
+
/*
The routine finally terminates the computation and returns the
digest. The handle is prepared for a new cycle, but adding bytes
to the handle will the destroy the returned buffer. Returns: 32
bytes with the message the digest. */
static void
sha256_final(void *context)
{
SHA256_CONTEXT *hd = context;
u32 t, th, msb, lsb;
byte *p;
unsigned int burn;
_gcry_md_block_write (hd, NULL, 0); /* flush */;
t = hd->bctx.nblocks;
if (sizeof t == sizeof hd->bctx.nblocks)
th = hd->bctx.nblocks_high;
else
th = hd->bctx.nblocks >> 32;
/* multiply by 64 to make a byte count */
lsb = t << 6;
msb = (th << 6) | (t >> 26);
/* add the count */
t = lsb;
if ((lsb += hd->bctx.count) < t)
msb++;
/* multiply by 8 to make a bit count */
t = lsb;
lsb <<= 3;
msb <<= 3;
msb |= t >> 29;
if (hd->bctx.count < 56)
{ /* enough room */
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
while (hd->bctx.count < 56)
hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
}
else
{ /* need one extra block */
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
while (hd->bctx.count < 64)
hd->bctx.buf[hd->bctx.count++] = 0;
_gcry_md_block_write (hd, NULL, 0); /* flush */;
memset (hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */
}
/* append the 64 bit count */
buf_put_be32(hd->bctx.buf + 56, msb);
buf_put_be32(hd->bctx.buf + 60, lsb);
- burn = transform (hd, hd->bctx.buf, 1);
+ burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
_gcry_burn_stack (burn);
p = hd->bctx.buf;
#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
X(0);
X(1);
X(2);
X(3);
X(4);
X(5);
X(6);
X(7);
#undef X
}
static byte *
sha256_read (void *context)
{
SHA256_CONTEXT *hd = context;
return hd->bctx.buf;
}
/* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 32 bytes. */
void
_gcry_sha256_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA256_CONTEXT hd;
sha256_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha256_final (&hd);
memcpy (outbuf, hd.bctx.buf, 32);
}
/* Variant of the above shortcut function using multiple buffers. */
void
_gcry_sha256_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA256_CONTEXT hd;
sha256_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha256_final (&hd);
memcpy (outbuf, hd.bctx.buf, 32);
}
/* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 28 bytes. */
static void
_gcry_sha224_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA256_CONTEXT hd;
sha224_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha256_final (&hd);
memcpy (outbuf, hd.bctx.buf, 28);
}
/* Variant of the above shortcut function using multiple buffers. */
static void
_gcry_sha224_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA256_CONTEXT hd;
sha224_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha256_final (&hd);
memcpy (outbuf, hd.bctx.buf, 28);
}
/*
Self-test section.
*/
static gpg_err_code_t
selftests_sha224 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA224, 0,
"abc", 3,
"\x23\x09\x7d\x22\x34\x05\xd8\x22\x86\x42\xa4\x77\xbd\xa2\x55\xb3"
"\x2a\xad\xbc\xe4\xbd\xa0\xb3\xf7\xe3\x6c\x9d\xa7", 28);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA224, 0,
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
"\x75\x38\x8b\x16\x51\x27\x76\xcc\x5d\xba\x5d\xa1\xfd\x89\x01\x50"
"\xb0\xc6\x45\x5c\xb4\xf5\x8b\x19\x52\x52\x25\x25", 28);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA224, 1,
NULL, 0,
"\x20\x79\x46\x55\x98\x0c\x91\xd8\xbb\xb4\xc1\xea\x97\x61\x8a\x4b"
"\xf0\x3f\x42\x58\x19\x48\xb2\xee\x4e\xe7\xad\x67", 28);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA224, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
static gpg_err_code_t
selftests_sha256 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA256, 0,
"abc", 3,
"\xba\x78\x16\xbf\x8f\x01\xcf\xea\x41\x41\x40\xde\x5d\xae\x22\x23"
"\xb0\x03\x61\xa3\x96\x17\x7a\x9c\xb4\x10\xff\x61\xf2\x00\x15\xad", 32);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA256, 0,
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
"\x24\x8d\x6a\x61\xd2\x06\x38\xb8\xe5\xc0\x26\x93\x0c\x3e\x60\x39"
"\xa3\x3c\xe4\x59\x64\xff\x21\x67\xf6\xec\xed\xd4\x19\xdb\x06\xc1",
32);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA256, 1,
NULL, 0,
"\xcd\xc7\x6e\x5c\x99\x14\xfb\x92\x81\xa1\xc7\xe2\x84\xd7\x3e\x67"
"\xf1\x80\x9a\x48\xa4\x97\x20\x0e\x04\x6d\x39\xcc\xc7\x11\x2c\xd0",
32);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA256, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
/* Run a full self-test for ALGO and return 0 on success. */
static gpg_err_code_t
run_selftests (int algo, int extended, selftest_report_func_t report)
{
gpg_err_code_t ec;
switch (algo)
{
case GCRY_MD_SHA224:
ec = selftests_sha224 (extended, report);
break;
case GCRY_MD_SHA256:
ec = selftests_sha256 (extended, report);
break;
default:
ec = GPG_ERR_DIGEST_ALGO;
break;
}
return ec;
}
static byte asn224[19] = /* Object ID is 2.16.840.1.101.3.4.2.4 */
{ 0x30, 0x2D, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48,
0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x05, 0x00, 0x04,
0x1C
};
static gcry_md_oid_spec_t oid_spec_sha224[] =
{
/* From RFC3874, Section 4 */
{ "2.16.840.1.101.3.4.2.4" },
{ NULL },
};
static byte asn256[19] = /* Object ID is 2.16.840.1.101.3.4.2.1 */
{ 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05,
0x00, 0x04, 0x20 };
static gcry_md_oid_spec_t oid_spec_sha256[] =
{
/* According to the OpenPGP draft rfc2440-bis06 */
{ "2.16.840.1.101.3.4.2.1" },
/* PKCS#1 sha256WithRSAEncryption */
{ "1.2.840.113549.1.1.11" },
{ NULL },
};
gcry_md_spec_t _gcry_digest_spec_sha224 =
{
GCRY_MD_SHA224, {0, 1},
"SHA224", asn224, DIM (asn224), oid_spec_sha224, 28,
sha224_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
_gcry_sha224_hash_buffer, _gcry_sha224_hash_buffers,
sizeof (SHA256_CONTEXT),
run_selftests
};
gcry_md_spec_t _gcry_digest_spec_sha256 =
{
GCRY_MD_SHA256, {0, 1},
"SHA256", asn256, DIM (asn256), oid_spec_sha256, 32,
sha256_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
_gcry_sha256_hash_buffer, _gcry_sha256_hash_buffers,
sizeof (SHA256_CONTEXT),
run_selftests
};
diff --git a/cipher/sha512-armv7-neon.S b/cipher/sha512-armv7-neon.S
index a9d12724..6596f2cd 100644
--- a/cipher/sha512-armv7-neon.S
+++ b/cipher/sha512-armv7-neon.S
@@ -1,449 +1,450 @@
/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
*
* Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
defined(HAVE_GCC_INLINE_ASM_NEON)
.text
.syntax unified
.fpu neon
.arm
/* structure of SHA512_CONTEXT */
#define hd_a 0
#define hd_b ((hd_a) + 8)
#define hd_c ((hd_b) + 8)
#define hd_d ((hd_c) + 8)
#define hd_e ((hd_d) + 8)
#define hd_f ((hd_e) + 8)
#define hd_g ((hd_f) + 8)
/* register macros */
#define RK %r2
#define RA d0
#define RB d1
#define RC d2
#define RD d3
#define RE d4
#define RF d5
#define RG d6
#define RH d7
#define RT0 d8
#define RT1 d9
#define RT2 d10
#define RT3 d11
#define RT4 d12
#define RT5 d13
#define RT6 d14
#define RT7 d15
#define RT01q q4
#define RT23q q5
#define RT45q q6
#define RT67q q7
#define RW0 d16
#define RW1 d17
#define RW2 d18
#define RW3 d19
#define RW4 d20
#define RW5 d21
#define RW6 d22
#define RW7 d23
#define RW8 d24
#define RW9 d25
#define RW10 d26
#define RW11 d27
#define RW12 d28
#define RW13 d29
#define RW14 d30
#define RW15 d31
#define RW01q q8
#define RW23q q9
#define RW45q q10
#define RW67q q11
#define RW89q q12
#define RW1011q q13
#define RW1213q q14
#define RW1415q q15
/***********************************************************************
* ARM assembly implementation of sha512 transform
***********************************************************************/
#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
vshr.u64 RT2, re, #14; \
vshl.u64 RT3, re, #64 - 14; \
interleave_op(arg1); \
vshr.u64 RT4, re, #18; \
vshl.u64 RT5, re, #64 - 18; \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, re, #41; \
vshl.u64 RT5, re, #64 - 41; \
vadd.u64 RT0, RT0, rw0; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, re; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, rf, rg; \
\
vadd.u64 RT1, RT1, rh; \
vshr.u64 RT2, ra, #28; \
vshl.u64 RT3, ra, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, ra, #34; \
vshl.u64 RT5, ra, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* h = Sum0 (a) + Maj (a, b, c); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, ra, #39; \
vshl.u64 RT5, ra, #64 - 39; \
veor.64 RT0, ra, rb; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rc, rb; \
vadd.u64 rd, rd, RT1; /* d+=t1; */ \
veor.64 rh, RT2, RT3; \
\
/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
vshr.u64 RT2, rd, #14; \
vshl.u64 RT3, rd, #64 - 14; \
vadd.u64 rh, rh, RT0; \
vshr.u64 RT4, rd, #18; \
vshl.u64 RT5, rd, #64 - 18; \
vadd.u64 rh, rh, RT1; /* h+=t1; */ \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rd, #41; \
vshl.u64 RT5, rd, #64 - 41; \
vadd.u64 RT0, RT0, rw1; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, rd; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, re, rf; \
\
vadd.u64 RT1, RT1, rg; \
vshr.u64 RT2, rh, #28; \
vshl.u64 RT3, rh, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, rh, #34; \
vshl.u64 RT5, rh, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* g = Sum0 (h) + Maj (h, a, b); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rh, #39; \
vshl.u64 RT5, rh, #64 - 39; \
veor.64 RT0, rh, ra; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rb, ra; \
vadd.u64 rc, rc, RT1; /* c+=t1; */ \
veor.64 rg, RT2, RT3; \
\
/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
\
/**** S0(w[1:2]) */ \
\
/* w[0:1] += w[9:10] */ \
/* RT23q = rw1:rw2 */ \
vext.u64 RT23q, rw01q, rw23q, #1; \
vadd.u64 rw0, rw9; \
vadd.u64 rg, rg, RT0; \
vadd.u64 rw1, rw10;\
vadd.u64 rg, rg, RT1; /* g+=t1; */ \
\
vshr.u64 RT45q, RT23q, #1; \
vshl.u64 RT67q, RT23q, #64 - 1; \
vshr.u64 RT01q, RT23q, #8; \
veor.u64 RT45q, RT45q, RT67q; \
vshl.u64 RT67q, RT23q, #64 - 8; \
veor.u64 RT45q, RT45q, RT01q; \
vshr.u64 RT01q, RT23q, #7; \
veor.u64 RT45q, RT45q, RT67q; \
\
/**** S1(w[14:15]) */ \
vshr.u64 RT23q, rw1415q, #6; \
veor.u64 RT01q, RT01q, RT45q; \
vshr.u64 RT45q, rw1415q, #19; \
vshl.u64 RT67q, rw1415q, #64 - 19; \
veor.u64 RT23q, RT23q, RT45q; \
vshr.u64 RT45q, rw1415q, #61; \
veor.u64 RT23q, RT23q, RT67q; \
vshl.u64 RT67q, rw1415q, #64 - 61; \
veor.u64 RT23q, RT23q, RT45q; \
vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
veor.u64 RT01q, RT23q, RT67q;
#define vadd_RT01q(rw01q) \
/* w[0:1] += S(w[14:15]) */ \
vadd.u64 rw01q, RT01q;
#define dummy(_) /*_*/
#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, interleave_op1, arg1, interleave_op2, arg2) \
/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
vshr.u64 RT2, re, #14; \
vshl.u64 RT3, re, #64 - 14; \
interleave_op1(arg1); \
vshr.u64 RT4, re, #18; \
vshl.u64 RT5, re, #64 - 18; \
interleave_op2(arg2); \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, re, #41; \
vshl.u64 RT5, re, #64 - 41; \
vadd.u64 RT0, RT0, rw0; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, re; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, rf, rg; \
\
vadd.u64 RT1, RT1, rh; \
vshr.u64 RT2, ra, #28; \
vshl.u64 RT3, ra, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, ra, #34; \
vshl.u64 RT5, ra, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* h = Sum0 (a) + Maj (a, b, c); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, ra, #39; \
vshl.u64 RT5, ra, #64 - 39; \
veor.64 RT0, ra, rb; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rc, rb; \
vadd.u64 rd, rd, RT1; /* d+=t1; */ \
veor.64 rh, RT2, RT3; \
\
/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
vshr.u64 RT2, rd, #14; \
vshl.u64 RT3, rd, #64 - 14; \
vadd.u64 rh, rh, RT0; \
vshr.u64 RT4, rd, #18; \
vshl.u64 RT5, rd, #64 - 18; \
vadd.u64 rh, rh, RT1; /* h+=t1; */ \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rd, #41; \
vshl.u64 RT5, rd, #64 - 41; \
vadd.u64 RT0, RT0, rw1; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, rd; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, re, rf; \
\
vadd.u64 RT1, RT1, rg; \
vshr.u64 RT2, rh, #28; \
vshl.u64 RT3, rh, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, rh, #34; \
vshl.u64 RT5, rh, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* g = Sum0 (h) + Maj (h, a, b); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rh, #39; \
vshl.u64 RT5, rh, #64 - 39; \
veor.64 RT0, rh, ra; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rb, ra; \
vadd.u64 rc, rc, RT1; /* c+=t1; */ \
veor.64 rg, RT2, RT3;
#define vadd_rg_RT0(rg) \
vadd.u64 rg, rg, RT0;
#define vadd_rg_RT1(rg) \
vadd.u64 rg, rg, RT1; /* g+=t1; */
.align 3
.globl _gcry_sha512_transform_armv7_neon
.type _gcry_sha512_transform_armv7_neon,%function;
_gcry_sha512_transform_armv7_neon:
/* Input:
* %r0: SHA512_CONTEXT
* %r1: data
* %r2: u64 k[] constants
* %r3: nblks
*/
push {%lr};
mov %lr, #0;
/* Load context to d0-d7 */
vld1.64 {RA-RD}, [%r0]!;
vld1.64 {RE-RH}, [%r0];
sub %r0, #(4*8);
/* Load input to w[16], d16-d31 */
/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
vld1.64 {RW0-RW3}, [%r1]!;
vld1.64 {RW4-RW7}, [%r1]!;
vld1.64 {RW8-RW11}, [%r1]!;
vld1.64 {RW12-RW15}, [%r1]!;
#ifdef __ARMEL__
/* byteswap */
vrev64.8 RW01q, RW01q;
vrev64.8 RW23q, RW23q;
vrev64.8 RW45q, RW45q;
vrev64.8 RW67q, RW67q;
vrev64.8 RW89q, RW89q;
vrev64.8 RW1011q, RW1011q;
vrev64.8 RW1213q, RW1213q;
vrev64.8 RW1415q, RW1415q;
#endif
/* EABI says that d8-d15 must be preserved by callee. */
vpush {RT0-RT7};
.Loop:
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, dummy, _);
b .Lenter_rounds;
.Loop_rounds:
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
.Lenter_rounds:
rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
add %lr, #16;
rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
cmp %lr, #64;
rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
bne .Loop_rounds;
subs %r3, #1;
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, vadd_RT01q, RW1415q, dummy, _);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
beq .Lhandle_tail;
vld1.64 {RW0-RW3}, [%r1]!;
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
#ifdef __ARMEL__
vrev64.8 RW01q, RW01q;
vrev64.8 RW23q, RW23q;
#endif
vld1.64 {RW4-RW7}, [%r1]!;
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
#ifdef __ARMEL__
vrev64.8 RW45q, RW45q;
vrev64.8 RW67q, RW67q;
#endif
vld1.64 {RW8-RW11}, [%r1]!;
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
#ifdef __ARMEL__
vrev64.8 RW89q, RW89q;
vrev64.8 RW1011q, RW1011q;
#endif
vld1.64 {RW12-RW15}, [%r1]!;
vadd_rg_RT0(RA);
vadd_rg_RT1(RA);
/* Load context */
vld1.64 {RT0-RT3}, [%r0]!;
vld1.64 {RT4-RT7}, [%r0];
sub %r0, #(4*8);
#ifdef __ARMEL__
vrev64.8 RW1213q, RW1213q;
vrev64.8 RW1415q, RW1415q;
#endif
vadd.u64 RA, RT0;
vadd.u64 RB, RT1;
vadd.u64 RC, RT2;
vadd.u64 RD, RT3;
vadd.u64 RE, RT4;
vadd.u64 RF, RT5;
vadd.u64 RG, RT6;
vadd.u64 RH, RT7;
/* Store the first half of context */
vst1.64 {RA-RD}, [%r0]!;
sub RK, $(8*80);
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
mov %lr, #0;
sub %r0, #(4*8);
b .Loop;
.ltorg
.Lhandle_tail:
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
/* Load context to d16-d23 */
vld1.64 {RW0-RW3}, [%r0]!;
vadd_rg_RT0(RA);
vld1.64 {RW4-RW7}, [%r0];
vadd_rg_RT1(RA);
sub %r0, #(4*8);
vadd.u64 RA, RW0;
vadd.u64 RB, RW1;
vadd.u64 RC, RW2;
vadd.u64 RD, RW3;
vadd.u64 RE, RW4;
vadd.u64 RF, RW5;
vadd.u64 RG, RW6;
vadd.u64 RH, RW7;
/* Store the first half of context */
vst1.64 {RA-RD}, [%r0]!;
/* Clear used registers */
/* d16-d31 */
veor.u64 RW01q, RW01q;
veor.u64 RW23q, RW23q;
veor.u64 RW45q, RW45q;
veor.u64 RW67q, RW67q;
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
veor.u64 RW89q, RW89q;
veor.u64 RW1011q, RW1011q;
veor.u64 RW1213q, RW1213q;
veor.u64 RW1415q, RW1415q;
/* d8-d15 */
vpop {RT0-RT7};
/* d0-d7 (q0-q3) */
veor.u64 %q0, %q0;
veor.u64 %q1, %q1;
veor.u64 %q2, %q2;
veor.u64 %q3, %q3;
+ eor %r0, %r0;
pop {%pc};
.size _gcry_sha512_transform_armv7_neon,.-_gcry_sha512_transform_armv7_neon;
#endif
diff --git a/cipher/sha512.c b/cipher/sha512.c
index 9405de80..721f3405 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -1,991 +1,951 @@
/* sha512.c - SHA384 and SHA512 hash functions
* Copyright (C) 2003, 2008, 2009 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser general Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/* Test vectors from FIPS-180-2:
*
* "abc"
* 384:
* CB00753F 45A35E8B B5A03D69 9AC65007 272C32AB 0EDED163
* 1A8B605A 43FF5BED 8086072B A1E7CC23 58BAECA1 34C825A7
* 512:
* DDAF35A1 93617ABA CC417349 AE204131 12E6FA4E 89A97EA2 0A9EEEE6 4B55D39A
* 2192992A 274FC1A8 36BA3C23 A3FEEBBD 454D4423 643CE80E 2A9AC94F A54CA49F
*
* "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"
* 384:
* 09330C33 F71147E8 3D192FC7 82CD1B47 53111B17 3B3B05D2
* 2FA08086 E3B0F712 FCC7C71A 557E2DB9 66C3E9FA 91746039
* 512:
* 8E959B75 DAE313DA 8CF4F728 14FC143F 8F7779C6 EB9F7FA1 7299AEAD B6889018
* 501D289E 4900F7E4 331B99DE C4B5433A C7D329EE B6DD2654 5E96E55B 874BE909
*
* "a" x 1000000
* 384:
* 9D0E1809 716474CB 086E834E 310A4A1C ED149E9C 00F24852
* 7972CEC5 704C2A5B 07B8B3DC 38ECC4EB AE97DDD8 7F3D8985
* 512:
* E718483D 0CE76964 4E2E42C7 BC15B463 8E1F98B1 3B204428 5632A803 AFA973EB
* DE0FF244 877EA60A 4CB0432C E577C31B EB009C5C 2C49AA2E 4EADB217 AD8CC09B
*/
#include <config.h>
#include <string.h>
#include "g10lib.h"
#include "bithelp.h"
#include "bufhelp.h"
#include "cipher.h"
#include "hash-common.h"
/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */
#undef USE_ARM_NEON_ASM
#ifdef ENABLE_NEON_SUPPORT
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
&& defined(HAVE_GCC_INLINE_ASM_NEON)
# define USE_ARM_NEON_ASM 1
# endif
#endif /*ENABLE_NEON_SUPPORT*/
/* USE_ARM_ASM indicates whether to enable ARM assembly code. */
#undef USE_ARM_ASM
#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
# define USE_ARM_ASM 1
#endif
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSSE3 1
#endif
/* USE_AVX indicates whether to compile with Intel AVX code. */
#undef USE_AVX
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX 1
#endif
/* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */
#undef USE_AVX2
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX2 1
#endif
typedef struct
{
u64 h0, h1, h2, h3, h4, h5, h6, h7;
} SHA512_STATE;
typedef struct
{
gcry_md_block_ctx_t bctx;
SHA512_STATE state;
+} SHA512_CONTEXT;
+
+
+static const u64 k[] =
+ {
+ U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
+ U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
+ U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
+ U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
+ U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
+ U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
+ U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
+ U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
+ U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
+ U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
+ U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
+ U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
+ U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
+ U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
+ U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
+ U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
+ U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
+ U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
+ U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
+ U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
+ U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
+ U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
+ U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
+ U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
+ U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
+ U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
+ U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
+ U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
+ U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
+ U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
+ U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
+ U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
+ U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
+ U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
+ U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
+ U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
+ U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
+ U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
+ U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
+ U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
+ };
+
+
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16 + 4 * sizeof(void *))
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
#ifdef USE_ARM_NEON_ASM
- unsigned int use_neon:1;
+unsigned int _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
+ const unsigned char *data,
+ const u64 k[], size_t num_blks);
+
+static unsigned int
+do_sha512_transform_armv7_neon(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_armv7_neon (&hd->state, data, k, nblks);
+}
#endif
+
#ifdef USE_SSSE3
- unsigned int use_ssse3:1;
+unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_ssse3(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_ssse3 (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_AVX
- unsigned int use_avx:1;
+unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_avx(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_avx (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
+
#ifdef USE_AVX2
- unsigned int use_avx2:1;
+unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
+ void *state,
+ size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_avx2(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_amd64_avx2 (data, &hd->state, nblks)
+ + ASM_EXTRA_STACK;
+}
#endif
-} SHA512_CONTEXT;
+
+
+#ifdef USE_ARM_ASM
+unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
+ const unsigned char *data,
+ const u64 k[], size_t num_blks);
static unsigned int
-transform (void *context, const unsigned char *data, size_t nblks);
+do_transform_generic (void *context, const unsigned char *data, size_t nblks)
+{
+ SHA512_CONTEXT *hd = context;
+ return _gcry_sha512_transform_armv7_neon (&hd->state, data, k, nblks);
+}
+#else
+static unsigned int
+do_transform_generic (void *context, const unsigned char *data, size_t nblks);
+#endif
+
static void
sha512_init (void *context, unsigned int flags)
{
SHA512_CONTEXT *ctx = context;
SHA512_STATE *hd = &ctx->state;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
+ (void)k;
hd->h0 = U64_C(0x6a09e667f3bcc908);
hd->h1 = U64_C(0xbb67ae8584caa73b);
hd->h2 = U64_C(0x3c6ef372fe94f82b);
hd->h3 = U64_C(0xa54ff53a5f1d36f1);
hd->h4 = U64_C(0x510e527fade682d1);
hd->h5 = U64_C(0x9b05688c2b3e6c1f);
hd->h6 = U64_C(0x1f83d9abfb41bd6b);
hd->h7 = U64_C(0x5be0cd19137e2179);
ctx->bctx.nblocks = 0;
ctx->bctx.nblocks_high = 0;
ctx->bctx.count = 0;
ctx->bctx.blocksize = 128;
- ctx->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ ctx->bctx.bwrite = do_transform_generic;
#ifdef USE_ARM_NEON_ASM
- ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+ if ((features & HWF_ARM_NEON) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_armv7_neon;
#endif
#ifdef USE_SSSE3
- ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
- ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx;
#endif
#ifdef USE_AVX2
- ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx2;
#endif
-
(void)features;
}
static void
sha384_init (void *context, unsigned int flags)
{
SHA512_CONTEXT *ctx = context;
SHA512_STATE *hd = &ctx->state;
unsigned int features = _gcry_get_hw_features ();
(void)flags;
hd->h0 = U64_C(0xcbbb9d5dc1059ed8);
hd->h1 = U64_C(0x629a292a367cd507);
hd->h2 = U64_C(0x9159015a3070dd17);
hd->h3 = U64_C(0x152fecd8f70e5939);
hd->h4 = U64_C(0x67332667ffc00b31);
hd->h5 = U64_C(0x8eb44a8768581511);
hd->h6 = U64_C(0xdb0c2e0d64f98fa7);
hd->h7 = U64_C(0x47b5481dbefa4fa4);
ctx->bctx.nblocks = 0;
ctx->bctx.nblocks_high = 0;
ctx->bctx.count = 0;
ctx->bctx.blocksize = 128;
- ctx->bctx.bwrite = transform;
+ /* Order of feature checks is important here; last match will be
+ * selected. Keep slower implementations at the top and faster at
+ * the bottom. */
+ ctx->bctx.bwrite = do_transform_generic;
#ifdef USE_ARM_NEON_ASM
- ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+ if ((features & HWF_ARM_NEON) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_armv7_neon;
#endif
#ifdef USE_SSSE3
- ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+ if ((features & HWF_INTEL_SSSE3) != 0)
+ ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3;
#endif
#ifdef USE_AVX
- ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx;
#endif
#ifdef USE_AVX2
- ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx2;
#endif
-
(void)features;
}
-static const u64 k[] =
- {
- U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
- U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
- U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
- U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
- U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
- U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
- U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
- U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
- U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
- U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
- U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
- U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
- U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
- U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
- U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
- U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
- U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
- U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
- U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
- U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
- U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
- U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
- U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
- U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
- U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
- U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
- U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
- U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
- U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
- U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
- U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
- U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
- U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
- U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
- U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
- U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
- U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
- U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
- U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
- U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
- };
-
#ifndef USE_ARM_ASM
static inline u64
ROTR (u64 x, u64 n)
{
return ((x >> n) | (x << (64 - n)));
}
static inline u64
Ch (u64 x, u64 y, u64 z)
{
return ((x & y) ^ ( ~x & z));
}
static inline u64
Maj (u64 x, u64 y, u64 z)
{
return ((x & y) ^ (x & z) ^ (y & z));
}
static inline u64
Sum0 (u64 x)
{
return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39));
}
static inline u64
Sum1 (u64 x)
{
return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41));
}
/****************
* Transform the message W which consists of 16 64-bit-words
*/
static unsigned int
-transform_blk (SHA512_STATE *hd, const unsigned char *data)
-{
- u64 a, b, c, d, e, f, g, h;
- u64 w[16];
- int t;
-
- /* get values from the chaining vars */
- a = hd->h0;
- b = hd->h1;
- c = hd->h2;
- d = hd->h3;
- e = hd->h4;
- f = hd->h5;
- g = hd->h6;
- h = hd->h7;
-
- for ( t = 0; t < 16; t++ )
- w[t] = buf_get_be64(data + t * 8);
-
-#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
-#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
-
- for (t = 0; t < 80 - 16; )
- {
- u64 t1, t2;
-
- /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e
- with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes
- initialized to 0,1,2,3...255,0,... and 1000 iterations:
-
- Not unrolled with macros: 440ms
- Unrolled with macros: 350ms
- Unrolled with inline: 330ms
- */
-#if 0 /* Not unrolled. */
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16];
- w[t%16] += S1 (w[(t - 2)%16]) + w[(t - 7)%16] + S0 (w[(t - 15)%16]);
- t2 = Sum0 (a) + Maj (a, b, c);
- h = g;
- g = f;
- f = e;
- e = d + t1;
- d = c;
- c = b;
- b = a;
- a = t1 + t2;
- t++;
-#else /* Unrolled to interweave the chain variables. */
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
- w[0] += S1 (w[14]) + w[9] + S0 (w[1]);
- t2 = Sum0 (a) + Maj (a, b, c);
- d += t1;
- h = t1 + t2;
-
- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
- w[1] += S1 (w[15]) + w[10] + S0 (w[2]);
- t2 = Sum0 (h) + Maj (h, a, b);
- c += t1;
- g = t1 + t2;
-
- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
- w[2] += S1 (w[0]) + w[11] + S0 (w[3]);
- t2 = Sum0 (g) + Maj (g, h, a);
- b += t1;
- f = t1 + t2;
-
- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
- w[3] += S1 (w[1]) + w[12] + S0 (w[4]);
- t2 = Sum0 (f) + Maj (f, g, h);
- a += t1;
- e = t1 + t2;
-
- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
- w[4] += S1 (w[2]) + w[13] + S0 (w[5]);
- t2 = Sum0 (e) + Maj (e, f, g);
- h += t1;
- d = t1 + t2;
-
- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
- w[5] += S1 (w[3]) + w[14] + S0 (w[6]);
- t2 = Sum0 (d) + Maj (d, e, f);
- g += t1;
- c = t1 + t2;
-
- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
- w[6] += S1 (w[4]) + w[15] + S0 (w[7]);
- t2 = Sum0 (c) + Maj (c, d, e);
- f += t1;
- b = t1 + t2;
-
- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
- w[7] += S1 (w[5]) + w[0] + S0 (w[8]);
- t2 = Sum0 (b) + Maj (b, c, d);
- e += t1;
- a = t1 + t2;
-
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
- w[8] += S1 (w[6]) + w[1] + S0 (w[9]);
- t2 = Sum0 (a) + Maj (a, b, c);
- d += t1;
- h = t1 + t2;
-
- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
- w[9] += S1 (w[7]) + w[2] + S0 (w[10]);
- t2 = Sum0 (h) + Maj (h, a, b);
- c += t1;
- g = t1 + t2;
-
- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
- w[10] += S1 (w[8]) + w[3] + S0 (w[11]);
- t2 = Sum0 (g) + Maj (g, h, a);
- b += t1;
- f = t1 + t2;
-
- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
- w[11] += S1 (w[9]) + w[4] + S0 (w[12]);
- t2 = Sum0 (f) + Maj (f, g, h);
- a += t1;
- e = t1 + t2;
-
- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
- w[12] += S1 (w[10]) + w[5] + S0 (w[13]);
- t2 = Sum0 (e) + Maj (e, f, g);
- h += t1;
- d = t1 + t2;
-
- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
- w[13] += S1 (w[11]) + w[6] + S0 (w[14]);
- t2 = Sum0 (d) + Maj (d, e, f);
- g += t1;
- c = t1 + t2;
-
- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
- w[14] += S1 (w[12]) + w[7] + S0 (w[15]);
- t2 = Sum0 (c) + Maj (c, d, e);
- f += t1;
- b = t1 + t2;
-
- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
- w[15] += S1 (w[13]) + w[8] + S0 (w[0]);
- t2 = Sum0 (b) + Maj (b, c, d);
- e += t1;
- a = t1 + t2;
-
- t += 16;
-#endif
- }
-
- for (; t < 80; )
- {
- u64 t1, t2;
-
-#if 0 /* Not unrolled. */
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16];
- t2 = Sum0 (a) + Maj (a, b, c);
- h = g;
- g = f;
- f = e;
- e = d + t1;
- d = c;
- c = b;
- b = a;
- a = t1 + t2;
- t++;
-#else /* Unrolled to interweave the chain variables. */
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
- t2 = Sum0 (a) + Maj (a, b, c);
- d += t1;
- h = t1 + t2;
-
- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
- t2 = Sum0 (h) + Maj (h, a, b);
- c += t1;
- g = t1 + t2;
-
- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
- t2 = Sum0 (g) + Maj (g, h, a);
- b += t1;
- f = t1 + t2;
-
- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
- t2 = Sum0 (f) + Maj (f, g, h);
- a += t1;
- e = t1 + t2;
-
- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
- t2 = Sum0 (e) + Maj (e, f, g);
- h += t1;
- d = t1 + t2;
-
- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
- t2 = Sum0 (d) + Maj (d, e, f);
- g += t1;
- c = t1 + t2;
-
- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
- t2 = Sum0 (c) + Maj (c, d, e);
- f += t1;
- b = t1 + t2;
-
- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
- t2 = Sum0 (b) + Maj (b, c, d);
- e += t1;
- a = t1 + t2;
-
- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
- t2 = Sum0 (a) + Maj (a, b, c);
- d += t1;
- h = t1 + t2;
-
- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
- t2 = Sum0 (h) + Maj (h, a, b);
- c += t1;
- g = t1 + t2;
-
- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
- t2 = Sum0 (g) + Maj (g, h, a);
- b += t1;
- f = t1 + t2;
-
- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
- t2 = Sum0 (f) + Maj (f, g, h);
- a += t1;
- e = t1 + t2;
-
- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
- t2 = Sum0 (e) + Maj (e, f, g);
- h += t1;
- d = t1 + t2;
-
- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
- t2 = Sum0 (d) + Maj (d, e, f);
- g += t1;
- c = t1 + t2;
-
- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
- t2 = Sum0 (c) + Maj (c, d, e);
- f += t1;
- b = t1 + t2;
-
- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
- t2 = Sum0 (b) + Maj (b, c, d);
- e += t1;
- a = t1 + t2;
-
- t += 16;
-#endif
- }
-
- /* Update chaining vars. */
- hd->h0 += a;
- hd->h1 += b;
- hd->h2 += c;
- hd->h3 += d;
- hd->h4 += e;
- hd->h5 += f;
- hd->h6 += g;
- hd->h7 += h;
-
- return /* burn_stack */ (8 + 16) * sizeof(u64) + sizeof(u32) +
- 3 * sizeof(void*);
-}
-#endif /*!USE_ARM_ASM*/
-
-/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
- * stack to store XMM6-XMM15 needed on Win64. */
-#undef ASM_FUNC_ABI
-#undef ASM_EXTRA_STACK
-#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-# define ASM_FUNC_ABI __attribute__((sysv_abi))
-# define ASM_EXTRA_STACK (10 * 16)
-# else
-# define ASM_FUNC_ABI
-# define ASM_EXTRA_STACK 0
-# endif
-#endif
-
-
-#ifdef USE_ARM_NEON_ASM
-void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
- const unsigned char *data,
- const u64 k[], size_t num_blks);
-#endif
-
-#ifdef USE_ARM_ASM
-unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
- const unsigned char *data,
- const u64 k[], size_t num_blks);
-#endif
-
-#ifdef USE_SSSE3
-unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
- void *state,
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX
-unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
- void *state,
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-#ifdef USE_AVX2
-unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
- void *state,
- size_t num_blks) ASM_FUNC_ABI;
-#endif
-
-
-static unsigned int
-transform (void *context, const unsigned char *data, size_t nblks)
+do_transform_generic (void *context, const unsigned char *data, size_t nblks)
{
SHA512_CONTEXT *ctx = context;
- unsigned int burn;
-
-#ifdef USE_AVX2
- if (ctx->use_avx2)
- return _gcry_sha512_transform_amd64_avx2 (data, &ctx->state, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
-#endif
-
-#ifdef USE_AVX
- if (ctx->use_avx)
- return _gcry_sha512_transform_amd64_avx (data, &ctx->state, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
-#endif
-
-#ifdef USE_SSSE3
- if (ctx->use_ssse3)
- return _gcry_sha512_transform_amd64_ssse3 (data, &ctx->state, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
-#endif
+ SHA512_STATE *hd = &ctx->state;
-#ifdef USE_ARM_NEON_ASM
- if (ctx->use_neon)
+ do
{
- _gcry_sha512_transform_armv7_neon (&ctx->state, data, k, nblks);
+ u64 a, b, c, d, e, f, g, h;
+ u64 w[16];
+ int t;
+
+ /* get values from the chaining vars */
+ a = hd->h0;
+ b = hd->h1;
+ c = hd->h2;
+ d = hd->h3;
+ e = hd->h4;
+ f = hd->h5;
+ g = hd->h6;
+ h = hd->h7;
+
+ for ( t = 0; t < 16; t++ )
+ w[t] = buf_get_be64(data + t * 8);
- /* _gcry_sha512_transform_armv7_neon does not store sensitive data
- * to stack. */
- return /* no burn_stack */ 0;
- }
-#endif
+#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+
+ for (t = 0; t < 80 - 16; )
+ {
+ u64 t1, t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
+ w[0] += S1 (w[14]) + w[9] + S0 (w[1]);
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
+ w[1] += S1 (w[15]) + w[10] + S0 (w[2]);
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
+ w[2] += S1 (w[0]) + w[11] + S0 (w[3]);
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
+ w[3] += S1 (w[1]) + w[12] + S0 (w[4]);
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
+ w[4] += S1 (w[2]) + w[13] + S0 (w[5]);
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
+ w[5] += S1 (w[3]) + w[14] + S0 (w[6]);
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
+ w[6] += S1 (w[4]) + w[15] + S0 (w[7]);
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
+ w[7] += S1 (w[5]) + w[0] + S0 (w[8]);
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
+ w[8] += S1 (w[6]) + w[1] + S0 (w[9]);
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
+ w[9] += S1 (w[7]) + w[2] + S0 (w[10]);
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
+ w[10] += S1 (w[8]) + w[3] + S0 (w[11]);
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
+ w[11] += S1 (w[9]) + w[4] + S0 (w[12]);
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
+ w[12] += S1 (w[10]) + w[5] + S0 (w[13]);
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
+ w[13] += S1 (w[11]) + w[6] + S0 (w[14]);
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
+ w[14] += S1 (w[12]) + w[7] + S0 (w[15]);
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
+ w[15] += S1 (w[13]) + w[8] + S0 (w[0]);
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t += 16;
+ }
+
+ for (; t < 80; )
+ {
+ u64 t1, t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
+ t2 = Sum0 (a) + Maj (a, b, c);
+ d += t1;
+ h = t1 + t2;
+
+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
+ t2 = Sum0 (h) + Maj (h, a, b);
+ c += t1;
+ g = t1 + t2;
+
+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
+ t2 = Sum0 (g) + Maj (g, h, a);
+ b += t1;
+ f = t1 + t2;
+
+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
+ t2 = Sum0 (f) + Maj (f, g, h);
+ a += t1;
+ e = t1 + t2;
+
+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
+ t2 = Sum0 (e) + Maj (e, f, g);
+ h += t1;
+ d = t1 + t2;
+
+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
+ t2 = Sum0 (d) + Maj (d, e, f);
+ g += t1;
+ c = t1 + t2;
+
+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
+ t2 = Sum0 (c) + Maj (c, d, e);
+ f += t1;
+ b = t1 + t2;
+
+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
+ t2 = Sum0 (b) + Maj (b, c, d);
+ e += t1;
+ a = t1 + t2;
+
+ t += 16;
+ }
+
+ /* Update chaining vars. */
+ hd->h0 += a;
+ hd->h1 += b;
+ hd->h2 += c;
+ hd->h3 += d;
+ hd->h4 += e;
+ hd->h5 += f;
+ hd->h6 += g;
+ hd->h7 += h;
-#ifdef USE_ARM_ASM
- burn = _gcry_sha512_transform_arm (&ctx->state, data, k, nblks);
-#else
- do
- {
- burn = transform_blk (&ctx->state, data) + 3 * sizeof(void*);
data += 128;
}
while (--nblks);
-#ifdef ASM_EXTRA_STACK
- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
- * here too.
- */
- burn += ASM_EXTRA_STACK;
-#endif
-#endif
-
- return burn;
+ return (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*);
}
+#endif /*!USE_ARM_ASM*/
/* The routine final terminates the computation and
* returns the digest.
* The handle is prepared for a new cycle, but adding bytes to the
* handle will the destroy the returned buffer.
* Returns: 64 bytes representing the digest. When used for sha384,
* we take the leftmost 48 of those bytes.
*/
static void
sha512_final (void *context)
{
SHA512_CONTEXT *hd = context;
unsigned int stack_burn_depth;
u64 t, th, msb, lsb;
byte *p;
_gcry_md_block_write (context, NULL, 0); /* flush */ ;
t = hd->bctx.nblocks;
/* if (sizeof t == sizeof hd->bctx.nblocks) */
th = hd->bctx.nblocks_high;
/* else */
/* th = hd->bctx.nblocks >> 64; In case we ever use u128 */
/* multiply by 128 to make a byte count */
lsb = t << 7;
msb = (th << 7) | (t >> 57);
/* add the count */
t = lsb;
if ((lsb += hd->bctx.count) < t)
msb++;
/* multiply by 8 to make a bit count */
t = lsb;
lsb <<= 3;
msb <<= 3;
msb |= t >> 61;
if (hd->bctx.count < 112)
{ /* enough room */
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
while (hd->bctx.count < 112)
hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
}
else
{ /* need one extra block */
hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
while (hd->bctx.count < 128)
hd->bctx.buf[hd->bctx.count++] = 0;
_gcry_md_block_write (context, NULL, 0); /* flush */ ;
memset (hd->bctx.buf, 0, 112); /* fill next block with zeroes */
}
/* append the 128 bit count */
buf_put_be64(hd->bctx.buf + 112, msb);
buf_put_be64(hd->bctx.buf + 120, lsb);
- stack_burn_depth = transform (hd, hd->bctx.buf, 1);
+ stack_burn_depth = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
_gcry_burn_stack (stack_burn_depth);
p = hd->bctx.buf;
#define X(a) do { buf_put_be64(p, hd->state.h##a); p += 8; } while (0)
X (0);
X (1);
X (2);
X (3);
X (4);
X (5);
/* Note that these last two chunks are included even for SHA384.
We just ignore them. */
X (6);
X (7);
#undef X
}
static byte *
sha512_read (void *context)
{
SHA512_CONTEXT *hd = (SHA512_CONTEXT *) context;
return hd->bctx.buf;
}
/* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 64 bytes. */
void
_gcry_sha512_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA512_CONTEXT hd;
sha512_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha512_final (&hd);
memcpy (outbuf, hd.bctx.buf, 64);
}
/* Variant of the above shortcut function using multiple buffers. */
void
_gcry_sha512_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA512_CONTEXT hd;
sha512_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha512_final (&hd);
memcpy (outbuf, hd.bctx.buf, 64);
}
/* Shortcut functions which puts the hash value of the supplied buffer
* into outbuf which must have a size of 48 bytes. */
static void
_gcry_sha384_hash_buffer (void *outbuf, const void *buffer, size_t length)
{
SHA512_CONTEXT hd;
sha384_init (&hd, 0);
_gcry_md_block_write (&hd, buffer, length);
sha512_final (&hd);
memcpy (outbuf, hd.bctx.buf, 48);
}
/* Variant of the above shortcut function using multiple buffers. */
static void
_gcry_sha384_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt)
{
SHA512_CONTEXT hd;
sha384_init (&hd, 0);
for (;iovcnt > 0; iov++, iovcnt--)
_gcry_md_block_write (&hd,
(const char*)iov[0].data + iov[0].off, iov[0].len);
sha512_final (&hd);
memcpy (outbuf, hd.bctx.buf, 48);
}
/*
Self-test section.
*/
static gpg_err_code_t
selftests_sha384 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA384, 0,
"abc", 3,
"\xcb\x00\x75\x3f\x45\xa3\x5e\x8b\xb5\xa0\x3d\x69\x9a\xc6\x50\x07"
"\x27\x2c\x32\xab\x0e\xde\xd1\x63\x1a\x8b\x60\x5a\x43\xff\x5b\xed"
"\x80\x86\x07\x2b\xa1\xe7\xcc\x23\x58\xba\xec\xa1\x34\xc8\x25\xa7", 48);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA384, 0,
"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
"hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
"\x09\x33\x0C\x33\xF7\x11\x47\xE8\x3D\x19\x2F\xC7\x82\xCD\x1B\x47"
"\x53\x11\x1B\x17\x3B\x3B\x05\xD2\x2F\xA0\x80\x86\xE3\xB0\xF7\x12"
"\xFC\xC7\xC7\x1A\x55\x7E\x2D\xB9\x66\xC3\xE9\xFA\x91\x74\x60\x39",
48);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA384, 1,
NULL, 0,
"\x9D\x0E\x18\x09\x71\x64\x74\xCB\x08\x6E\x83\x4E\x31\x0A\x4A\x1C"
"\xED\x14\x9E\x9C\x00\xF2\x48\x52\x79\x72\xCE\xC5\x70\x4C\x2A\x5B"
"\x07\xB8\xB3\xDC\x38\xEC\xC4\xEB\xAE\x97\xDD\xD8\x7F\x3D\x89\x85",
48);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA384, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
static gpg_err_code_t
selftests_sha512 (int extended, selftest_report_func_t report)
{
const char *what;
const char *errtxt;
what = "short string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA512, 0,
"abc", 3,
"\xDD\xAF\x35\xA1\x93\x61\x7A\xBA\xCC\x41\x73\x49\xAE\x20\x41\x31"
"\x12\xE6\xFA\x4E\x89\xA9\x7E\xA2\x0A\x9E\xEE\xE6\x4B\x55\xD3\x9A"
"\x21\x92\x99\x2A\x27\x4F\xC1\xA8\x36\xBA\x3C\x23\xA3\xFE\xEB\xBD"
"\x45\x4D\x44\x23\x64\x3C\xE8\x0E\x2A\x9A\xC9\x4F\xA5\x4C\xA4\x9F", 64);
if (errtxt)
goto failed;
if (extended)
{
what = "long string";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA512, 0,
"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
"hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
"\x8E\x95\x9B\x75\xDA\xE3\x13\xDA\x8C\xF4\xF7\x28\x14\xFC\x14\x3F"
"\x8F\x77\x79\xC6\xEB\x9F\x7F\xA1\x72\x99\xAE\xAD\xB6\x88\x90\x18"
"\x50\x1D\x28\x9E\x49\x00\xF7\xE4\x33\x1B\x99\xDE\xC4\xB5\x43\x3A"
"\xC7\xD3\x29\xEE\xB6\xDD\x26\x54\x5E\x96\xE5\x5B\x87\x4B\xE9\x09",
64);
if (errtxt)
goto failed;
what = "one million \"a\"";
errtxt = _gcry_hash_selftest_check_one
(GCRY_MD_SHA512, 1,
NULL, 0,
"\xE7\x18\x48\x3D\x0C\xE7\x69\x64\x4E\x2E\x42\xC7\xBC\x15\xB4\x63"
"\x8E\x1F\x98\xB1\x3B\x20\x44\x28\x56\x32\xA8\x03\xAF\xA9\x73\xEB"
"\xDE\x0F\xF2\x44\x87\x7E\xA6\x0A\x4C\xB0\x43\x2C\xE5\x77\xC3\x1B"
"\xEB\x00\x9C\x5C\x2C\x49\xAA\x2E\x4E\xAD\xB2\x17\xAD\x8C\xC0\x9B",
64);
if (errtxt)
goto failed;
}
return 0; /* Succeeded. */
failed:
if (report)
report ("digest", GCRY_MD_SHA512, what, errtxt);
return GPG_ERR_SELFTEST_FAILED;
}
/* Run a full self-test for ALGO and return 0 on success. */
static gpg_err_code_t
run_selftests (int algo, int extended, selftest_report_func_t report)
{
gpg_err_code_t ec;
switch (algo)
{
case GCRY_MD_SHA384:
ec = selftests_sha384 (extended, report);
break;
case GCRY_MD_SHA512:
ec = selftests_sha512 (extended, report);
break;
default:
ec = GPG_ERR_DIGEST_ALGO;
break;
}
return ec;
}
static byte sha512_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.3 */
{
0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x05,
0x00, 0x04, 0x40
};
static gcry_md_oid_spec_t oid_spec_sha512[] =
{
{ "2.16.840.1.101.3.4.2.3" },
/* PKCS#1 sha512WithRSAEncryption */
{ "1.2.840.113549.1.1.13" },
{ NULL }
};
gcry_md_spec_t _gcry_digest_spec_sha512 =
{
GCRY_MD_SHA512, {0, 1},
"SHA512", sha512_asn, DIM (sha512_asn), oid_spec_sha512, 64,
sha512_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
_gcry_sha512_hash_buffer, _gcry_sha512_hash_buffers,
sizeof (SHA512_CONTEXT),
run_selftests
};
static byte sha384_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.2 */
{
0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, 0x05,
0x00, 0x04, 0x30
};
static gcry_md_oid_spec_t oid_spec_sha384[] =
{
{ "2.16.840.1.101.3.4.2.2" },
/* PKCS#1 sha384WithRSAEncryption */
{ "1.2.840.113549.1.1.12" },
/* SHA384WithECDSA: RFC 7427 (A.3.3.) */
{ "1.2.840.10045.4.3.3" },
{ NULL },
};
gcry_md_spec_t _gcry_digest_spec_sha384 =
{
GCRY_MD_SHA384, {0, 1},
"SHA384", sha384_asn, DIM (sha384_asn), oid_spec_sha384, 48,
sha384_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
_gcry_sha384_hash_buffer, _gcry_sha384_hash_buffers,
sizeof (SHA512_CONTEXT),
run_selftests
};
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Dec 9, 1:02 AM (1 d, 4 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
78/37/e4e98d06145cc721670a0d3088d8
Attached To
rC libgcrypt
Event Timeline
Log In to Comment