No OneTemporary
Actions

Size

109 KB

Subscribers

None

View Options

	diff --git a/cipher/sha1.c b/cipher/sha1.c
	index e50262ff..76c486c7 100644
	--- a/cipher/sha1.c
	+++ b/cipher/sha1.c
	@@ -1,671 +1,664 @@
	/* sha1.c - SHA1 hash function
	* Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
	*
	* This file is part of Libgcrypt.
	*
	* Libgcrypt is free software; you can redistribute it and/or modify
	* it under the terms of the GNU Lesser General Public License as
	* published by the Free Software Foundation; either version 2.1 of
	* the License, or (at your option) any later version.
	*
	* Libgcrypt is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this program; if not, see <http://www.gnu.org/licenses/>.
	*/


	/* Test vectors:
	*
	* "abc"
	* A999 3E36 4706 816A BA3E 2571 7850 C26C 9CD0 D89D
	*
	* "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
	* 8498 3E44 1C3B D26E BAAE 4AA1 F951 29E5 E546 70F1
	*/


	#include <config.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#ifdef HAVE_STDINT_H
	# include <stdint.h>
	#endif

	#include "g10lib.h"
	#include "bithelp.h"
	#include "bufhelp.h"
	#include "cipher.h"
	#include "sha1.h"


	/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
	#undef USE_SSSE3
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_SSSE3 1
	#endif

	/* USE_AVX indicates whether to compile with Intel AVX code. */
	#undef USE_AVX
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_AVX 1
	#endif

	/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
	#undef USE_BMI2
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
	defined(HAVE_GCC_INLINE_ASM_BMI2) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_BMI2 1
	#endif

	/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
	#undef USE_SHAEXT
	#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
	defined(HAVE_GCC_INLINE_ASM_SSE41) && \
	defined(ENABLE_SHAEXT_SUPPORT)
	# define USE_SHAEXT 1
	#endif

	/* USE_NEON indicates whether to enable ARM NEON assembly code. */
	#undef USE_NEON
	#ifdef ENABLE_NEON_SUPPORT
	# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
	&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
	&& defined(HAVE_GCC_INLINE_ASM_NEON)
	# define USE_NEON 1
	# endif
	#endif

	/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
	* code. */
	#undef USE_ARM_CE
	#ifdef ENABLE_ARM_CRYPTO_SUPPORT
	# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
	&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
	&& defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
	# define USE_ARM_CE 1
	# elif defined(__AARCH64EL__) \
	&& defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
	&& defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
	# define USE_ARM_CE 1
	# endif
	#endif

	/* A macro to test whether P is properly aligned for an u32 type.
	Note that config.h provides a suitable replacement for uintptr_t if
	it does not exist in stdint.h. */
	/* #if __GNUC__ >= 2 */
	/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % __alignof__ (u32))) */
	/* #else */
	/* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % sizeof (u32))) */
	/* #endif */


	+
	+/* Assembly implementations use SystemV ABI, ABI conversion and additional
	+ * stack to store XMM6-XMM15 needed on Win64. */
	+#undef ASM_FUNC_ABI
	+#undef ASM_EXTRA_STACK
	+#if defined(USE_SSSE3) \|\| defined(USE_AVX) \|\| defined(USE_BMI2) \|\| \
	+ defined(USE_SHAEXT)
	+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
	+# define ASM_FUNC_ABI __attribute__((sysv_abi))
	+# define ASM_EXTRA_STACK (10 * 16 + sizeof(void ) 4)
	+# else
	+# define ASM_FUNC_ABI
	+# define ASM_EXTRA_STACK 0
	+# endif
	+#endif
	+
	+
	+#ifdef USE_SSSE3
	+unsigned int
	+_gcry_sha1_transform_amd64_ssse3 (void state, const unsigned char data,
	+ size_t nblks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha1_transform_amd64_ssse3 (void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA1_CONTEXT *hd = ctx;
	+ return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	+#endif
	+
	+#ifdef USE_AVX
	+unsigned int
	+_gcry_sha1_transform_amd64_avx (void state, const unsigned char data,
	+ size_t nblks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha1_transform_amd64_avx (void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA1_CONTEXT *hd = ctx;
	+ return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	+#endif
	+
	+#ifdef USE_BMI2
	+unsigned int
	+_gcry_sha1_transform_amd64_avx_bmi2 (void state, const unsigned char data,
	+ size_t nblks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha1_transform_amd64_avx_bmi2 (void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA1_CONTEXT *hd = ctx;
	+ return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	+#endif
	+
	+#ifdef USE_SHAEXT
	+/* Does not need ASM_FUNC_ABI */
	+unsigned int
	+_gcry_sha1_transform_intel_shaext (void state, const unsigned char data,
	+ size_t nblks);
	+
	static unsigned int
	-transform (void c, const unsigned char data, size_t nblks);
	+do_sha1_transform_intel_shaext (void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA1_CONTEXT *hd = ctx;
	+ return _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
	+}
	+#endif
	+
	+#ifdef USE_NEON
	+unsigned int
	+_gcry_sha1_transform_armv7_neon (void state, const unsigned char data,
	+ size_t nblks);
	+
	+static unsigned int
	+do_sha1_transform_armv7_neon (void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA1_CONTEXT *hd = ctx;
	+ return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
	+}
	+#endif
	+
	+#ifdef USE_ARM_CE
	+unsigned int
	+_gcry_sha1_transform_armv8_ce (void state, const unsigned char data,
	+ size_t nblks);
	+
	+static unsigned int
	+do_sha1_transform_armv8_ce (void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA1_CONTEXT *hd = ctx;
	+ return _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
	+}
	+#endif
	+
	+
	+static unsigned int
	+do_transform_generic (void c, const unsigned char data, size_t nblks);


	static void
	sha1_init (void *context, unsigned int flags)
	{
	SHA1_CONTEXT *hd = context;
	unsigned int features = _gcry_get_hw_features ();

	(void)flags;

	hd->h0 = 0x67452301;
	hd->h1 = 0xefcdab89;
	hd->h2 = 0x98badcfe;
	hd->h3 = 0x10325476;
	hd->h4 = 0xc3d2e1f0;

	hd->bctx.nblocks = 0;
	hd->bctx.nblocks_high = 0;
	hd->bctx.count = 0;
	hd->bctx.blocksize = 64;
	- hd->bctx.bwrite = transform;

	+ /* Order of feature checks is important here; last match will be
	+ * selected. Keep slower implementations at the top and faster at
	+ * the bottom. */
	+ hd->bctx.bwrite = do_transform_generic;
	#ifdef USE_SSSE3
	- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
	+ if ((features & HWF_INTEL_SSSE3) != 0)
	+ hd->bctx.bwrite = do_sha1_transform_amd64_ssse3;
	#endif
	#ifdef USE_AVX
	/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
	* Therefore use this implementation on Intel CPUs only. */
	- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
	+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
	+ hd->bctx.bwrite = do_sha1_transform_amd64_avx;
	#endif
	#ifdef USE_BMI2
	- hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2);
	+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2))
	+ hd->bctx.bwrite = do_sha1_transform_amd64_avx_bmi2;
	#endif
	#ifdef USE_SHAEXT
	- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
	- && (features & HWF_INTEL_SSE4_1);
	+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
	+ hd->bctx.bwrite = do_sha1_transform_intel_shaext;
	#endif
	#ifdef USE_NEON
	- hd->use_neon = (features & HWF_ARM_NEON) != 0;
	+ if ((features & HWF_ARM_NEON) != 0)
	+ hd->bctx.bwrite = do_sha1_transform_armv7_neon;
	#endif
	#ifdef USE_ARM_CE
	- hd->use_arm_ce = (features & HWF_ARM_SHA1) != 0;
	+ if ((features & HWF_ARM_SHA1) != 0)
	+ hd->bctx.bwrite = do_sha1_transform_armv8_ce;
	#endif
	+
	(void)features;
	}

	/*
	* Initialize the context HD. This is used to prepare the use of
	* _gcry_sha1_mixblock. WARNING: This is a special purpose function
	* for exclusive use by random-csprng.c.
	*/
	void
	_gcry_sha1_mixblock_init (SHA1_CONTEXT *hd)
	{
	sha1_init (hd, 0);
	}


	/* Round function macros. */
	#define K1 0x5A827999L
	#define K2 0x6ED9EBA1L
	#define K3 0x8F1BBCDCL
	#define K4 0xCA62C1D6L
	#define F1(x,y,z) ( z ^ ( x & ( y ^ z ) ) )
	#define F2(x,y,z) ( x ^ y ^ z )
	#define F3(x,y,z) ( ( x & y ) \| ( z & ( x \| y ) ) )
	#define F4(x,y,z) ( x ^ y ^ z )
	#define M(i) ( tm = x[ i &0x0f] \
	^ x[(i-14)&0x0f] \
	^ x[(i-8) &0x0f] \
	^ x[(i-3) &0x0f], \
	(x[i&0x0f] = rol(tm, 1)))
	#define R(a,b,c,d,e,f,k,m) do { e += rol( a, 5 ) \
	+ f( b, c, d ) \
	+ k \
	+ m; \
	b = rol( b, 30 ); \
	} while(0)

	-
	-#ifdef USE_NEON
	-unsigned int
	-_gcry_sha1_transform_armv7_neon (void state, const unsigned char data,
	- size_t nblks);
	-#endif
	-
	-#ifdef USE_ARM_CE
	-unsigned int
	-_gcry_sha1_transform_armv8_ce (void state, const unsigned char data,
	- size_t nblks);
	-#endif
	-
	/*
	* Transform NBLOCKS of each 64 bytes (16 32-bit words) at DATA.
	*/
	static unsigned int
	-transform_blk (void ctx, const unsigned char data)
	+do_transform_generic (void ctx, const unsigned char data, size_t nblks)
	{
	SHA1_CONTEXT *hd = ctx;
	- const u32 idata = (const void )data;
	- register u32 a, b, c, d, e; /* Local copies of the chaining variables. */
	- register u32 tm; /* Helper. */
	- u32 x[16]; /* The array we work on. */
	+
	+ do
	+ {
	+ const u32 idata = (const void )data;
	+ u32 a, b, c, d, e; /* Local copies of the chaining variables. */
	+ u32 tm; /* Helper. */
	+ u32 x[16]; /* The array we work on. */

	#define I(i) (x[i] = buf_get_be32(idata + i))

	/* Get the values of the chaining variables. */
	a = hd->h0;
	b = hd->h1;
	c = hd->h2;
	d = hd->h3;
	e = hd->h4;

	/* Transform. */
	R( a, b, c, d, e, F1, K1, I( 0) );
	R( e, a, b, c, d, F1, K1, I( 1) );
	R( d, e, a, b, c, F1, K1, I( 2) );
	R( c, d, e, a, b, F1, K1, I( 3) );
	R( b, c, d, e, a, F1, K1, I( 4) );
	R( a, b, c, d, e, F1, K1, I( 5) );
	R( e, a, b, c, d, F1, K1, I( 6) );
	R( d, e, a, b, c, F1, K1, I( 7) );
	R( c, d, e, a, b, F1, K1, I( 8) );
	R( b, c, d, e, a, F1, K1, I( 9) );
	R( a, b, c, d, e, F1, K1, I(10) );
	R( e, a, b, c, d, F1, K1, I(11) );
	R( d, e, a, b, c, F1, K1, I(12) );
	R( c, d, e, a, b, F1, K1, I(13) );
	R( b, c, d, e, a, F1, K1, I(14) );
	R( a, b, c, d, e, F1, K1, I(15) );
	R( e, a, b, c, d, F1, K1, M(16) );
	R( d, e, a, b, c, F1, K1, M(17) );
	R( c, d, e, a, b, F1, K1, M(18) );
	R( b, c, d, e, a, F1, K1, M(19) );
	R( a, b, c, d, e, F2, K2, M(20) );
	R( e, a, b, c, d, F2, K2, M(21) );
	R( d, e, a, b, c, F2, K2, M(22) );
	R( c, d, e, a, b, F2, K2, M(23) );
	R( b, c, d, e, a, F2, K2, M(24) );
	R( a, b, c, d, e, F2, K2, M(25) );
	R( e, a, b, c, d, F2, K2, M(26) );
	R( d, e, a, b, c, F2, K2, M(27) );
	R( c, d, e, a, b, F2, K2, M(28) );
	R( b, c, d, e, a, F2, K2, M(29) );
	R( a, b, c, d, e, F2, K2, M(30) );
	R( e, a, b, c, d, F2, K2, M(31) );
	R( d, e, a, b, c, F2, K2, M(32) );
	R( c, d, e, a, b, F2, K2, M(33) );
	R( b, c, d, e, a, F2, K2, M(34) );
	R( a, b, c, d, e, F2, K2, M(35) );
	R( e, a, b, c, d, F2, K2, M(36) );
	R( d, e, a, b, c, F2, K2, M(37) );
	R( c, d, e, a, b, F2, K2, M(38) );
	R( b, c, d, e, a, F2, K2, M(39) );
	R( a, b, c, d, e, F3, K3, M(40) );
	R( e, a, b, c, d, F3, K3, M(41) );
	R( d, e, a, b, c, F3, K3, M(42) );
	R( c, d, e, a, b, F3, K3, M(43) );
	R( b, c, d, e, a, F3, K3, M(44) );
	R( a, b, c, d, e, F3, K3, M(45) );
	R( e, a, b, c, d, F3, K3, M(46) );
	R( d, e, a, b, c, F3, K3, M(47) );
	R( c, d, e, a, b, F3, K3, M(48) );
	R( b, c, d, e, a, F3, K3, M(49) );
	R( a, b, c, d, e, F3, K3, M(50) );
	R( e, a, b, c, d, F3, K3, M(51) );
	R( d, e, a, b, c, F3, K3, M(52) );
	R( c, d, e, a, b, F3, K3, M(53) );
	R( b, c, d, e, a, F3, K3, M(54) );
	R( a, b, c, d, e, F3, K3, M(55) );
	R( e, a, b, c, d, F3, K3, M(56) );
	R( d, e, a, b, c, F3, K3, M(57) );
	R( c, d, e, a, b, F3, K3, M(58) );
	R( b, c, d, e, a, F3, K3, M(59) );
	R( a, b, c, d, e, F4, K4, M(60) );
	R( e, a, b, c, d, F4, K4, M(61) );
	R( d, e, a, b, c, F4, K4, M(62) );
	R( c, d, e, a, b, F4, K4, M(63) );
	R( b, c, d, e, a, F4, K4, M(64) );
	R( a, b, c, d, e, F4, K4, M(65) );
	R( e, a, b, c, d, F4, K4, M(66) );
	R( d, e, a, b, c, F4, K4, M(67) );
	R( c, d, e, a, b, F4, K4, M(68) );
	R( b, c, d, e, a, F4, K4, M(69) );
	R( a, b, c, d, e, F4, K4, M(70) );
	R( e, a, b, c, d, F4, K4, M(71) );
	R( d, e, a, b, c, F4, K4, M(72) );
	R( c, d, e, a, b, F4, K4, M(73) );
	R( b, c, d, e, a, F4, K4, M(74) );
	R( a, b, c, d, e, F4, K4, M(75) );
	R( e, a, b, c, d, F4, K4, M(76) );
	R( d, e, a, b, c, F4, K4, M(77) );
	R( c, d, e, a, b, F4, K4, M(78) );
	R( b, c, d, e, a, F4, K4, M(79) );

	/* Update the chaining variables. */
	hd->h0 += a;
	hd->h1 += b;
	hd->h2 += c;
	hd->h3 += d;
	hd->h4 += e;

	- return /* burn_stack / 88+4sizeof(void*);
	-}
	-
	-
	-/* Assembly implementations use SystemV ABI, ABI conversion and additional
	- * stack to store XMM6-XMM15 needed on Win64. */
	-#undef ASM_FUNC_ABI
	-#undef ASM_EXTRA_STACK
	-#if defined(USE_SSSE3) \|\| defined(USE_AVX) \|\| defined(USE_BMI2) \|\| \
	- defined(USE_SHAEXT)
	-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
	-# define ASM_FUNC_ABI __attribute__((sysv_abi))
	-# define ASM_EXTRA_STACK (10 * 16)
	-# else
	-# define ASM_FUNC_ABI
	-# define ASM_EXTRA_STACK 0
	-# endif
	-#endif
	-
	-
	-#ifdef USE_SSSE3
	-unsigned int
	-_gcry_sha1_transform_amd64_ssse3 (void state, const unsigned char data,
	- size_t nblks) ASM_FUNC_ABI;
	-#endif
	-
	-#ifdef USE_AVX
	-unsigned int
	-_gcry_sha1_transform_amd64_avx (void state, const unsigned char data,
	- size_t nblks) ASM_FUNC_ABI;
	-#endif
	-
	-#ifdef USE_BMI2
	-unsigned int
	-_gcry_sha1_transform_amd64_avx_bmi2 (void state, const unsigned char data,
	- size_t nblks) ASM_FUNC_ABI;
	-#endif
	-
	-#ifdef USE_SHAEXT
	-/* Does not need ASM_FUNC_ABI */
	-unsigned int
	-_gcry_sha1_transform_intel_shaext (void state, const unsigned char data,
	- size_t nblks);
	-#endif
	-
	-
	-static unsigned int
	-transform (void ctx, const unsigned char data, size_t nblks)
	-{
	- SHA1_CONTEXT *hd = ctx;
	- unsigned int burn;
	-
	-#ifdef USE_SHAEXT
	- if (hd->use_shaext)
	- {
	- burn = _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
	- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
	- return burn;
	- }
	-#endif
	-#ifdef USE_BMI2
	- if (hd->use_bmi2)
	- {
	- burn = _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks);
	- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
	- return burn;
	- }
	-#endif
	-#ifdef USE_AVX
	- if (hd->use_avx)
	- {
	- burn = _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks);
	- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
	- return burn;
	- }
	-#endif
	-#ifdef USE_SSSE3
	- if (hd->use_ssse3)
	- {
	- burn = _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks);
	- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
	- return burn;
	- }
	-#endif
	-#ifdef USE_ARM_CE
	- if (hd->use_arm_ce)
	- {
	- burn = _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
	- burn += burn ? 4 * sizeof(void*) : 0;
	- return burn;
	- }
	-#endif
	-#ifdef USE_NEON
	- if (hd->use_neon)
	- {
	- burn = _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
	- burn += burn ? 4 * sizeof(void*) : 0;
	- return burn;
	- }
	-#endif
	-
	- do
	- {
	- burn = transform_blk (hd, data);
	data += 64;
	}
	while (--nblks);

	-#ifdef ASM_EXTRA_STACK
	- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
	- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
	- * here too.
	- */
	- burn += ASM_EXTRA_STACK;
	-#endif
	-
	- return burn;
	+ return 88+4sizeof(void);
	}


	/*
	* Apply the SHA-1 transform function on the buffer BLOCKOF64BYTE
	* which must have a length 64 bytes. BLOCKOF64BYTE must be 32-bit
	* aligned. Updates the 20 bytes in BLOCKOF64BYTE with its mixed
	* content. Returns the number of bytes which should be burned on the
	* stack. You need to use _gcry_sha1_mixblock_init to initialize the
	* context.
	* WARNING: This is a special purpose function for exclusive use by
	* random-csprng.c.
	*/
	unsigned int
	_gcry_sha1_mixblock (SHA1_CONTEXT hd, void blockof64byte)
	{
	u32 *p = blockof64byte;
	unsigned int nburn;

	- nburn = transform (hd, blockof64byte, 1);
	+ nburn = (*hd->bctx.bwrite) (hd, blockof64byte, 1);
	p[0] = hd->h0;
	p[1] = hd->h1;
	p[2] = hd->h2;
	p[3] = hd->h3;
	p[4] = hd->h4;

	return nburn;
	}


	/* The routine final terminates the computation and
	* returns the digest.
	* The handle is prepared for a new cycle, but adding bytes to the
	* handle will the destroy the returned buffer.
	* Returns: 20 bytes representing the digest.
	*/

	static void
	sha1_final(void *context)
	{
	SHA1_CONTEXT *hd = context;
	u32 t, th, msb, lsb;
	unsigned char *p;
	unsigned int burn;

	_gcry_md_block_write (hd, NULL, 0); /* flush */;

	t = hd->bctx.nblocks;
	if (sizeof t == sizeof hd->bctx.nblocks)
	th = hd->bctx.nblocks_high;
	else
	th = hd->bctx.nblocks >> 32;

	/* multiply by 64 to make a byte count */
	lsb = t << 6;
	msb = (th << 6) \| (t >> 26);
	/* add the count */
	t = lsb;
	if( (lsb += hd->bctx.count) < t )
	msb++;
	/* multiply by 8 to make a bit count */
	t = lsb;
	lsb <<= 3;
	msb <<= 3;
	msb \|= t >> 29;

	if( hd->bctx.count < 56 ) /* enough room */
	{
	hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
	while( hd->bctx.count < 56 )
	hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
	}
	else /* need one extra block */
	{
	hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
	while( hd->bctx.count < 64 )
	hd->bctx.buf[hd->bctx.count++] = 0;
	_gcry_md_block_write(hd, NULL, 0); /* flush */;
	memset(hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */
	}
	/* append the 64 bit count */
	buf_put_be32(hd->bctx.buf + 56, msb);
	buf_put_be32(hd->bctx.buf + 60, lsb);
	- burn = transform( hd, hd->bctx.buf, 1 );
	+ burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 1 );
	_gcry_burn_stack (burn);

	p = hd->bctx.buf;
	#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
	X(0);
	X(1);
	X(2);
	X(3);
	X(4);
	#undef X

	}

	static unsigned char *
	sha1_read( void *context )
	{
	SHA1_CONTEXT *hd = context;

	return hd->bctx.buf;
	}

	/****************
	* Shortcut functions which puts the hash value of the supplied buffer
	* into outbuf which must have a size of 20 bytes.
	*/
	void
	_gcry_sha1_hash_buffer (void outbuf, const void buffer, size_t length)
	{
	SHA1_CONTEXT hd;

	sha1_init (&hd, 0);
	_gcry_md_block_write (&hd, buffer, length);
	sha1_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 20);
	}


	/* Variant of the above shortcut function using a multiple buffers. */
	void
	_gcry_sha1_hash_buffers (void outbuf, const gcry_buffer_t iov, int iovcnt)
	{
	SHA1_CONTEXT hd;

	sha1_init (&hd, 0);
	for (;iovcnt > 0; iov++, iovcnt--)
	_gcry_md_block_write (&hd,
	(const char*)iov[0].data + iov[0].off, iov[0].len);
	sha1_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 20);
	}



	/*
	Self-test section.
	*/


	static gpg_err_code_t
	selftests_sha1 (int extended, selftest_report_func_t report)
	{
	const char *what;
	const char *errtxt;

	what = "short string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA1, 0,
	"abc", 3,
	"\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E"
	"\x25\x71\x78\x50\xC2\x6C\x9C\xD0\xD8\x9D", 20);
	if (errtxt)
	goto failed;

	if (extended)
	{
	what = "long string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA1, 0,
	"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
	"\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE"
	"\x4A\xA1\xF9\x51\x29\xE5\xE5\x46\x70\xF1", 20);
	if (errtxt)
	goto failed;

	what = "one million \"a\"";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA1, 1,
	NULL, 0,
	"\x34\xAA\x97\x3C\xD4\xC4\xDA\xA4\xF6\x1E"
	"\xEB\x2B\xDB\xAD\x27\x31\x65\x34\x01\x6F", 20);
	if (errtxt)
	goto failed;
	}

	return 0; /* Succeeded. */

	failed:
	if (report)
	report ("digest", GCRY_MD_SHA1, what, errtxt);
	return GPG_ERR_SELFTEST_FAILED;
	}


	/* Run a full self-test for ALGO and return 0 on success. */
	static gpg_err_code_t
	run_selftests (int algo, int extended, selftest_report_func_t report)
	{
	gpg_err_code_t ec;

	switch (algo)
	{
	case GCRY_MD_SHA1:
	ec = selftests_sha1 (extended, report);
	break;
	default:
	ec = GPG_ERR_DIGEST_ALGO;
	break;

	}
	return ec;
	}




	static unsigned char asn[15] = /* Object ID is 1.3.14.3.2.26 */
	{ 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03,
	0x02, 0x1a, 0x05, 0x00, 0x04, 0x14 };

	static gcry_md_oid_spec_t oid_spec_sha1[] =
	{
	/* iso.member-body.us.rsadsi.pkcs.pkcs-1.5 (sha1WithRSAEncryption) */
	{ "1.2.840.113549.1.1.5" },
	/* iso.member-body.us.x9-57.x9cm.3 (dsaWithSha1)*/
	{ "1.2.840.10040.4.3" },
	/* from NIST's OIW (sha1) */
	{ "1.3.14.3.2.26" },
	/* from NIST OIW (sha-1WithRSAEncryption) */
	{ "1.3.14.3.2.29" },
	/* iso.member-body.us.ansi-x9-62.signatures.ecdsa-with-sha1 */
	{ "1.2.840.10045.4.1" },
	{ NULL },
	};

	gcry_md_spec_t _gcry_digest_spec_sha1 =
	{
	GCRY_MD_SHA1, {0, 1},
	"SHA1", asn, DIM (asn), oid_spec_sha1, 20,
	sha1_init, _gcry_md_block_write, sha1_final, sha1_read, NULL,
	_gcry_sha1_hash_buffer, _gcry_sha1_hash_buffers,
	sizeof (SHA1_CONTEXT),
	run_selftests
	};
	diff --git a/cipher/sha1.h b/cipher/sha1.h
	index 93ce79b5..acf764ba 100644
	--- a/cipher/sha1.h
	+++ b/cipher/sha1.h
	@@ -1,41 +1,35 @@
	/* sha1.h - SHA-1 context definition
	* Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
	*
	* This file is part of Libgcrypt.
	*
	* Libgcrypt is free software; you can redistribute it and/or modify
	* it under the terms of the GNU Lesser General Public License as
	* published by the Free Software Foundation; either version 2.1 of
	* the License, or (at your option) any later version.
	*
	* Libgcrypt is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this program; if not, see <http://www.gnu.org/licenses/>.
	*/
	#ifndef GCRY_SHA1_H
	#define GCRY_SHA1_H

	#include "hash-common.h"

	/* We need this here for direct use by random-csprng.c. */
	typedef struct
	{
	gcry_md_block_ctx_t bctx;
	u32 h0,h1,h2,h3,h4;
	- unsigned int use_ssse3:1;
	- unsigned int use_avx:1;
	- unsigned int use_bmi2:1;
	- unsigned int use_shaext:1;
	- unsigned int use_neon:1;
	- unsigned int use_arm_ce:1;
	} SHA1_CONTEXT;


	void _gcry_sha1_mixblock_init (SHA1_CONTEXT *hd);
	unsigned int _gcry_sha1_mixblock (SHA1_CONTEXT hd, void blockof64byte);

	#endif /GCRY_SHA1_H/
	diff --git a/cipher/sha256.c b/cipher/sha256.c
	index 06959707..e82a9d90 100644
	--- a/cipher/sha256.c
	+++ b/cipher/sha256.c
	@@ -1,788 +1,769 @@
	/* sha256.c - SHA256 hash function
	* Copyright (C) 2003, 2006, 2008, 2009 Free Software Foundation, Inc.
	*
	* This file is part of Libgcrypt.
	*
	* Libgcrypt is free software; you can redistribute it and/or modify
	* it under the terms of the GNU Lesser General Public License as
	* published by the Free Software Foundation; either version 2.1 of
	* the License, or (at your option) any later version.
	*
	* Libgcrypt is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this program; if not, see <http://www.gnu.org/licenses/>.
	*/


	/* Test vectors:

	"abc"
	SHA224: 23097d22 3405d822 8642a477 bda255b3 2aadbce4 bda0b3f7 e36c9da7
	SHA256: ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad

	"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
	SHA224: 75388b16 512776cc 5dba5da1 fd890150 b0c6455c b4f58b19 52522525
	SHA256: 248d6a61 d20638b8 e5c02693 0c3e6039 a33ce459 64ff2167 f6ecedd4 19db06c1

	"a" one million times
	SHA224: 20794655 980c91d8 bbb4c1ea 97618a4b f03f4258 1948b2ee 4ee7ad67
	SHA256: cdc76e5c 9914fb92 81a1c7e2 84d73e67 f1809a48 a497200e 046d39cc c7112cd0

	*/


	#include <config.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	#include "g10lib.h"
	#include "bithelp.h"
	#include "bufhelp.h"
	#include "cipher.h"
	#include "hash-common.h"


	/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
	#undef USE_SSSE3
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
	defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_SSSE3 1
	#endif

	/* USE_AVX indicates whether to compile with Intel AVX code. */
	#undef USE_AVX
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
	defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_AVX 1
	#endif

	/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */
	#undef USE_AVX2
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
	defined(HAVE_GCC_INLINE_ASM_BMI2) && \
	defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_AVX2 1
	#endif

	/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
	#undef USE_SHAEXT
	#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
	defined(HAVE_GCC_INLINE_ASM_SSE41) && \
	defined(ENABLE_SHAEXT_SUPPORT)
	# define USE_SHAEXT 1
	#endif

	/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
	* code. */
	#undef USE_ARM_CE
	#ifdef ENABLE_ARM_CRYPTO_SUPPORT
	# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
	&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
	&& defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
	# define USE_ARM_CE 1
	# elif defined(__AARCH64EL__) \
	&& defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
	&& defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
	# define USE_ARM_CE 1
	# endif
	#endif


	typedef struct {
	gcry_md_block_ctx_t bctx;
	u32 h0,h1,h2,h3,h4,h5,h6,h7;
	+} SHA256_CONTEXT;
	+
	+
	+/* Assembly implementations use SystemV ABI, ABI conversion and additional
	+ * stack to store XMM6-XMM15 needed on Win64. */
	+#undef ASM_FUNC_ABI
	+#undef ASM_EXTRA_STACK
	+#if defined(USE_SSSE3) \|\| defined(USE_AVX) \|\| defined(USE_AVX2) \|\| \
	+ defined(USE_SHAEXT)
	+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
	+# define ASM_FUNC_ABI __attribute__((sysv_abi))
	+# define ASM_EXTRA_STACK (10 * 16 + sizeof(void ) 4)
	+# else
	+# define ASM_FUNC_ABI
	+# define ASM_EXTRA_STACK 0
	+# endif
	+#endif
	+
	+
	#ifdef USE_SSSE3
	- unsigned int use_ssse3:1;
	+unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
	+ u32 state[8],
	+ size_t num_blks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha256_transform_amd64_ssse3(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA256_CONTEXT *hd = ctx;
	+ return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	#endif
	+
	#ifdef USE_AVX
	- unsigned int use_avx:1;
	+unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
	+ u32 state[8],
	+ size_t num_blks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha256_transform_amd64_avx(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA256_CONTEXT *hd = ctx;
	+ return _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	#endif
	+
	#ifdef USE_AVX2
	- unsigned int use_avx2:1;
	+unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
	+ u32 state[8],
	+ size_t num_blks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha256_transform_amd64_avx2(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA256_CONTEXT *hd = ctx;
	+ return _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	#endif
	+
	#ifdef USE_SHAEXT
	- unsigned int use_shaext:1;
	+/* Does not need ASM_FUNC_ABI */
	+unsigned int
	+_gcry_sha256_transform_intel_shaext(u32 state[8],
	+ const unsigned char *input_data,
	+ size_t num_blks);
	+
	+static unsigned int
	+do_sha256_transform_intel_shaext(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA256_CONTEXT *hd = ctx;
	+ return _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks);
	+}
	#endif
	+
	#ifdef USE_ARM_CE
	- unsigned int use_arm_ce:1;
	+unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8],
	+ const void *input_data,
	+ size_t num_blks);
	+
	+static unsigned int
	+do_sha256_transform_armv8_ce(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA256_CONTEXT *hd = ctx;
	+ return _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks);
	+}
	#endif
	-} SHA256_CONTEXT;


	static unsigned int
	-transform (void c, const unsigned char data, size_t nblks);
	+do_transform_generic (void ctx, const unsigned char data, size_t nblks);


	static void
	sha256_init (void *context, unsigned int flags)
	{
	SHA256_CONTEXT *hd = context;
	unsigned int features = _gcry_get_hw_features ();

	(void)flags;

	hd->h0 = 0x6a09e667;
	hd->h1 = 0xbb67ae85;
	hd->h2 = 0x3c6ef372;
	hd->h3 = 0xa54ff53a;
	hd->h4 = 0x510e527f;
	hd->h5 = 0x9b05688c;
	hd->h6 = 0x1f83d9ab;
	hd->h7 = 0x5be0cd19;

	hd->bctx.nblocks = 0;
	hd->bctx.nblocks_high = 0;
	hd->bctx.count = 0;
	hd->bctx.blocksize = 64;
	- hd->bctx.bwrite = transform;

	+ /* Order of feature checks is important here; last match will be
	+ * selected. Keep slower implementations at the top and faster at
	+ * the bottom. */
	+ hd->bctx.bwrite = do_transform_generic;
	#ifdef USE_SSSE3
	- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
	+ if ((features & HWF_INTEL_SSSE3) != 0)
	+ hd->bctx.bwrite = do_sha256_transform_amd64_ssse3;
	#endif
	#ifdef USE_AVX
	/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
	* Therefore use this implementation on Intel CPUs only. */
	- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
	+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
	+ hd->bctx.bwrite = do_sha256_transform_amd64_avx;
	#endif
	#ifdef USE_AVX2
	- hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
	+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
	+ hd->bctx.bwrite = do_sha256_transform_amd64_avx2;
	#endif
	#ifdef USE_SHAEXT
	- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
	- && (features & HWF_INTEL_SSE4_1);
	+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
	+ hd->bctx.bwrite = do_sha256_transform_intel_shaext;
	#endif
	#ifdef USE_ARM_CE
	- hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0;
	+ if ((features & HWF_ARM_SHA2) != 0)
	+ hd->bctx.bwrite = do_sha256_transform_armv8_ce;
	#endif
	(void)features;
	}


	static void
	sha224_init (void *context, unsigned int flags)
	{
	SHA256_CONTEXT *hd = context;
	unsigned int features = _gcry_get_hw_features ();

	(void)flags;

	hd->h0 = 0xc1059ed8;
	hd->h1 = 0x367cd507;
	hd->h2 = 0x3070dd17;
	hd->h3 = 0xf70e5939;
	hd->h4 = 0xffc00b31;
	hd->h5 = 0x68581511;
	hd->h6 = 0x64f98fa7;
	hd->h7 = 0xbefa4fa4;

	hd->bctx.nblocks = 0;
	hd->bctx.nblocks_high = 0;
	hd->bctx.count = 0;
	hd->bctx.blocksize = 64;
	- hd->bctx.bwrite = transform;

	+ /* Order of feature checks is important here; last match will be
	+ * selected. Keep slower implementations at the top and faster at
	+ * the bottom. */
	+ hd->bctx.bwrite = do_transform_generic;
	#ifdef USE_SSSE3
	- hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
	+ if ((features & HWF_INTEL_SSSE3) != 0)
	+ hd->bctx.bwrite = do_sha256_transform_amd64_ssse3;
	#endif
	#ifdef USE_AVX
	/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
	* Therefore use this implementation on Intel CPUs only. */
	- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
	+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
	+ hd->bctx.bwrite = do_sha256_transform_amd64_avx;
	#endif
	#ifdef USE_AVX2
	- hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
	+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
	+ hd->bctx.bwrite = do_sha256_transform_amd64_avx2;
	#endif
	#ifdef USE_SHAEXT
	- hd->use_shaext = (features & HWF_INTEL_SHAEXT)
	- && (features & HWF_INTEL_SSE4_1);
	+ if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
	+ hd->bctx.bwrite = do_sha256_transform_intel_shaext;
	#endif
	#ifdef USE_ARM_CE
	- hd->use_arm_ce = (features & HWF_ARM_SHA2) != 0;
	+ if ((features & HWF_ARM_SHA2) != 0)
	+ hd->bctx.bwrite = do_sha256_transform_armv8_ce;
	#endif
	(void)features;
	}


	/*
	Transform the message X which consists of 16 32-bit-words. See FIPS
	180-2 for details. */
	#define R(a,b,c,d,e,f,g,h,k,w) do \
	{ \
	t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + (k) + (w); \
	t2 = Sum0((a)) + Maj((a),(b),(c)); \
	d += t1; \
	h = t1 + t2; \
	} while (0)

	/* (4.2) same as SHA-1's F1. */
	#define Cho(x, y, z) (z ^ (x & (y ^ z)))

	/* (4.3) same as SHA-1's F3 */
	#define Maj(x, y, z) ((x & y) + (z & (x ^ y)))

	/* (4.4) */
	#define Sum0(x) (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22))

	/* (4.5) */
	#define Sum1(x) (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25))

	/* Message expansion */
	#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3)) /* (4.6) */
	#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10)) /* (4.7) */
	#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
	#define W(i) ( w[i&0x0f] = S1(w[(i-2) &0x0f]) \
	+ w[(i-7) &0x0f] \
	+ S0(w[(i-15)&0x0f]) \
	+ w[(i-16)&0x0f] )

	static unsigned int
	-transform_blk (void ctx, const unsigned char data)
	+do_transform_generic (void ctx, const unsigned char data, size_t nblks)
	{
	SHA256_CONTEXT *hd = ctx;
	static const u32 K[64] = {
	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
	};

	- u32 a,b,c,d,e,f,g,h,t1,t2;
	- u32 w[16];
	-
	- a = hd->h0;
	- b = hd->h1;
	- c = hd->h2;
	- d = hd->h3;
	- e = hd->h4;
	- f = hd->h5;
	- g = hd->h6;
	- h = hd->h7;
	-
	- R(a, b, c, d, e, f, g, h, K[0], I(0));
	- R(h, a, b, c, d, e, f, g, K[1], I(1));
	- R(g, h, a, b, c, d, e, f, K[2], I(2));
	- R(f, g, h, a, b, c, d, e, K[3], I(3));
	- R(e, f, g, h, a, b, c, d, K[4], I(4));
	- R(d, e, f, g, h, a, b, c, K[5], I(5));
	- R(c, d, e, f, g, h, a, b, K[6], I(6));
	- R(b, c, d, e, f, g, h, a, K[7], I(7));
	- R(a, b, c, d, e, f, g, h, K[8], I(8));
	- R(h, a, b, c, d, e, f, g, K[9], I(9));
	- R(g, h, a, b, c, d, e, f, K[10], I(10));
	- R(f, g, h, a, b, c, d, e, K[11], I(11));
	- R(e, f, g, h, a, b, c, d, K[12], I(12));
	- R(d, e, f, g, h, a, b, c, K[13], I(13));
	- R(c, d, e, f, g, h, a, b, K[14], I(14));
	- R(b, c, d, e, f, g, h, a, K[15], I(15));
	-
	- R(a, b, c, d, e, f, g, h, K[16], W(16));
	- R(h, a, b, c, d, e, f, g, K[17], W(17));
	- R(g, h, a, b, c, d, e, f, K[18], W(18));
	- R(f, g, h, a, b, c, d, e, K[19], W(19));
	- R(e, f, g, h, a, b, c, d, K[20], W(20));
	- R(d, e, f, g, h, a, b, c, K[21], W(21));
	- R(c, d, e, f, g, h, a, b, K[22], W(22));
	- R(b, c, d, e, f, g, h, a, K[23], W(23));
	- R(a, b, c, d, e, f, g, h, K[24], W(24));
	- R(h, a, b, c, d, e, f, g, K[25], W(25));
	- R(g, h, a, b, c, d, e, f, K[26], W(26));
	- R(f, g, h, a, b, c, d, e, K[27], W(27));
	- R(e, f, g, h, a, b, c, d, K[28], W(28));
	- R(d, e, f, g, h, a, b, c, K[29], W(29));
	- R(c, d, e, f, g, h, a, b, K[30], W(30));
	- R(b, c, d, e, f, g, h, a, K[31], W(31));
	-
	- R(a, b, c, d, e, f, g, h, K[32], W(32));
	- R(h, a, b, c, d, e, f, g, K[33], W(33));
	- R(g, h, a, b, c, d, e, f, K[34], W(34));
	- R(f, g, h, a, b, c, d, e, K[35], W(35));
	- R(e, f, g, h, a, b, c, d, K[36], W(36));
	- R(d, e, f, g, h, a, b, c, K[37], W(37));
	- R(c, d, e, f, g, h, a, b, K[38], W(38));
	- R(b, c, d, e, f, g, h, a, K[39], W(39));
	- R(a, b, c, d, e, f, g, h, K[40], W(40));
	- R(h, a, b, c, d, e, f, g, K[41], W(41));
	- R(g, h, a, b, c, d, e, f, K[42], W(42));
	- R(f, g, h, a, b, c, d, e, K[43], W(43));
	- R(e, f, g, h, a, b, c, d, K[44], W(44));
	- R(d, e, f, g, h, a, b, c, K[45], W(45));
	- R(c, d, e, f, g, h, a, b, K[46], W(46));
	- R(b, c, d, e, f, g, h, a, K[47], W(47));
	-
	- R(a, b, c, d, e, f, g, h, K[48], W(48));
	- R(h, a, b, c, d, e, f, g, K[49], W(49));
	- R(g, h, a, b, c, d, e, f, K[50], W(50));
	- R(f, g, h, a, b, c, d, e, K[51], W(51));
	- R(e, f, g, h, a, b, c, d, K[52], W(52));
	- R(d, e, f, g, h, a, b, c, K[53], W(53));
	- R(c, d, e, f, g, h, a, b, K[54], W(54));
	- R(b, c, d, e, f, g, h, a, K[55], W(55));
	- R(a, b, c, d, e, f, g, h, K[56], W(56));
	- R(h, a, b, c, d, e, f, g, K[57], W(57));
	- R(g, h, a, b, c, d, e, f, K[58], W(58));
	- R(f, g, h, a, b, c, d, e, K[59], W(59));
	- R(e, f, g, h, a, b, c, d, K[60], W(60));
	- R(d, e, f, g, h, a, b, c, K[61], W(61));
	- R(c, d, e, f, g, h, a, b, K[62], W(62));
	- R(b, c, d, e, f, g, h, a, K[63], W(63));
	-
	- hd->h0 += a;
	- hd->h1 += b;
	- hd->h2 += c;
	- hd->h3 += d;
	- hd->h4 += e;
	- hd->h5 += f;
	- hd->h6 += g;
	- hd->h7 += h;
	-
	- return /burn_stack/ 26*4+32;
	-}
	-#undef S0
	-#undef S1
	-#undef R
	-
	-
	-/* Assembly implementations use SystemV ABI, ABI conversion and additional
	- * stack to store XMM6-XMM15 needed on Win64. */
	-#undef ASM_FUNC_ABI
	-#undef ASM_EXTRA_STACK
	-#if defined(USE_SSSE3) \|\| defined(USE_AVX) \|\| defined(USE_AVX2) \|\| \
	- defined(USE_SHAEXT)
	-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
	-# define ASM_FUNC_ABI __attribute__((sysv_abi))
	-# define ASM_EXTRA_STACK (10 * 16)
	-# else
	-# define ASM_FUNC_ABI
	-# define ASM_EXTRA_STACK 0
	-# endif
	-#endif
	-
	-
	-#ifdef USE_SSSE3
	-unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
	- u32 state[8],
	- size_t num_blks) ASM_FUNC_ABI;
	-#endif
	-
	-#ifdef USE_AVX
	-unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
	- u32 state[8],
	- size_t num_blks) ASM_FUNC_ABI;
	-#endif
	-
	-#ifdef USE_AVX2
	-unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
	- u32 state[8],
	- size_t num_blks) ASM_FUNC_ABI;
	-#endif
	-
	-#ifdef USE_SHAEXT
	-/* Does not need ASM_FUNC_ABI */
	-unsigned int
	-_gcry_sha256_transform_intel_shaext(u32 state[8],
	- const unsigned char *input_data,
	- size_t num_blks);
	-#endif
	-
	-#ifdef USE_ARM_CE
	-unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8],
	- const void *input_data,
	- size_t num_blks);
	-#endif
	-
	-static unsigned int
	-transform (void ctx, const unsigned char data, size_t nblks)
	-{
	- SHA256_CONTEXT *hd = ctx;
	- unsigned int burn;
	-
	-#ifdef USE_SHAEXT
	- if (hd->use_shaext)
	- {
	- burn = _gcry_sha256_transform_intel_shaext (&hd->h0, data, nblks);
	- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
	- return burn;
	- }
	-#endif
	-
	-#ifdef USE_AVX2
	- if (hd->use_avx2)
	- {
	- burn = _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks);
	- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
	- return burn;
	- }
	-#endif
	-
	-#ifdef USE_AVX
	- if (hd->use_avx)
	- {
	- burn = _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks);
	- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
	- return burn;
	- }
	-#endif
	-
	-#ifdef USE_SSSE3
	- if (hd->use_ssse3)
	+ do
	{
	- burn = _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks);
	- burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
	- return burn;
	- }
	-#endif

	-#ifdef USE_ARM_CE
	- if (hd->use_arm_ce)
	- {
	- burn = _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks);
	- burn += burn ? 4 * sizeof(void*) : 0;
	- return burn;
	- }
	-#endif
	+ u32 a,b,c,d,e,f,g,h,t1,t2;
	+ u32 w[16];
	+
	+ a = hd->h0;
	+ b = hd->h1;
	+ c = hd->h2;
	+ d = hd->h3;
	+ e = hd->h4;
	+ f = hd->h5;
	+ g = hd->h6;
	+ h = hd->h7;
	+
	+ R(a, b, c, d, e, f, g, h, K[0], I(0));
	+ R(h, a, b, c, d, e, f, g, K[1], I(1));
	+ R(g, h, a, b, c, d, e, f, K[2], I(2));
	+ R(f, g, h, a, b, c, d, e, K[3], I(3));
	+ R(e, f, g, h, a, b, c, d, K[4], I(4));
	+ R(d, e, f, g, h, a, b, c, K[5], I(5));
	+ R(c, d, e, f, g, h, a, b, K[6], I(6));
	+ R(b, c, d, e, f, g, h, a, K[7], I(7));
	+ R(a, b, c, d, e, f, g, h, K[8], I(8));
	+ R(h, a, b, c, d, e, f, g, K[9], I(9));
	+ R(g, h, a, b, c, d, e, f, K[10], I(10));
	+ R(f, g, h, a, b, c, d, e, K[11], I(11));
	+ R(e, f, g, h, a, b, c, d, K[12], I(12));
	+ R(d, e, f, g, h, a, b, c, K[13], I(13));
	+ R(c, d, e, f, g, h, a, b, K[14], I(14));
	+ R(b, c, d, e, f, g, h, a, K[15], I(15));
	+
	+ R(a, b, c, d, e, f, g, h, K[16], W(16));
	+ R(h, a, b, c, d, e, f, g, K[17], W(17));
	+ R(g, h, a, b, c, d, e, f, K[18], W(18));
	+ R(f, g, h, a, b, c, d, e, K[19], W(19));
	+ R(e, f, g, h, a, b, c, d, K[20], W(20));
	+ R(d, e, f, g, h, a, b, c, K[21], W(21));
	+ R(c, d, e, f, g, h, a, b, K[22], W(22));
	+ R(b, c, d, e, f, g, h, a, K[23], W(23));
	+ R(a, b, c, d, e, f, g, h, K[24], W(24));
	+ R(h, a, b, c, d, e, f, g, K[25], W(25));
	+ R(g, h, a, b, c, d, e, f, K[26], W(26));
	+ R(f, g, h, a, b, c, d, e, K[27], W(27));
	+ R(e, f, g, h, a, b, c, d, K[28], W(28));
	+ R(d, e, f, g, h, a, b, c, K[29], W(29));
	+ R(c, d, e, f, g, h, a, b, K[30], W(30));
	+ R(b, c, d, e, f, g, h, a, K[31], W(31));
	+
	+ R(a, b, c, d, e, f, g, h, K[32], W(32));
	+ R(h, a, b, c, d, e, f, g, K[33], W(33));
	+ R(g, h, a, b, c, d, e, f, K[34], W(34));
	+ R(f, g, h, a, b, c, d, e, K[35], W(35));
	+ R(e, f, g, h, a, b, c, d, K[36], W(36));
	+ R(d, e, f, g, h, a, b, c, K[37], W(37));
	+ R(c, d, e, f, g, h, a, b, K[38], W(38));
	+ R(b, c, d, e, f, g, h, a, K[39], W(39));
	+ R(a, b, c, d, e, f, g, h, K[40], W(40));
	+ R(h, a, b, c, d, e, f, g, K[41], W(41));
	+ R(g, h, a, b, c, d, e, f, K[42], W(42));
	+ R(f, g, h, a, b, c, d, e, K[43], W(43));
	+ R(e, f, g, h, a, b, c, d, K[44], W(44));
	+ R(d, e, f, g, h, a, b, c, K[45], W(45));
	+ R(c, d, e, f, g, h, a, b, K[46], W(46));
	+ R(b, c, d, e, f, g, h, a, K[47], W(47));
	+
	+ R(a, b, c, d, e, f, g, h, K[48], W(48));
	+ R(h, a, b, c, d, e, f, g, K[49], W(49));
	+ R(g, h, a, b, c, d, e, f, K[50], W(50));
	+ R(f, g, h, a, b, c, d, e, K[51], W(51));
	+ R(e, f, g, h, a, b, c, d, K[52], W(52));
	+ R(d, e, f, g, h, a, b, c, K[53], W(53));
	+ R(c, d, e, f, g, h, a, b, K[54], W(54));
	+ R(b, c, d, e, f, g, h, a, K[55], W(55));
	+ R(a, b, c, d, e, f, g, h, K[56], W(56));
	+ R(h, a, b, c, d, e, f, g, K[57], W(57));
	+ R(g, h, a, b, c, d, e, f, K[58], W(58));
	+ R(f, g, h, a, b, c, d, e, K[59], W(59));
	+ R(e, f, g, h, a, b, c, d, K[60], W(60));
	+ R(d, e, f, g, h, a, b, c, K[61], W(61));
	+ R(c, d, e, f, g, h, a, b, K[62], W(62));
	+ R(b, c, d, e, f, g, h, a, K[63], W(63));
	+
	+ hd->h0 += a;
	+ hd->h1 += b;
	+ hd->h2 += c;
	+ hd->h3 += d;
	+ hd->h4 += e;
	+ hd->h5 += f;
	+ hd->h6 += g;
	+ hd->h7 += h;

	- do
	- {
	- burn = transform_blk (hd, data);
	data += 64;
	}
	while (--nblks);

	-#ifdef ASM_EXTRA_STACK
	- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
	- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
	- * here too.
	- */
	- burn += ASM_EXTRA_STACK;
	-#endif
	-
	- return burn;
	+ return 264 + 32 + 3 sizeof(void*);
	}

	+#undef S0
	+#undef S1
	+#undef R
	+

	/*
	The routine finally terminates the computation and returns the
	digest. The handle is prepared for a new cycle, but adding bytes
	to the handle will the destroy the returned buffer. Returns: 32
	bytes with the message the digest. */
	static void
	sha256_final(void *context)
	{
	SHA256_CONTEXT *hd = context;
	u32 t, th, msb, lsb;
	byte *p;
	unsigned int burn;

	_gcry_md_block_write (hd, NULL, 0); /* flush */;

	t = hd->bctx.nblocks;
	if (sizeof t == sizeof hd->bctx.nblocks)
	th = hd->bctx.nblocks_high;
	else
	th = hd->bctx.nblocks >> 32;

	/* multiply by 64 to make a byte count */
	lsb = t << 6;
	msb = (th << 6) \| (t >> 26);
	/* add the count */
	t = lsb;
	if ((lsb += hd->bctx.count) < t)
	msb++;
	/* multiply by 8 to make a bit count */
	t = lsb;
	lsb <<= 3;
	msb <<= 3;
	msb \|= t >> 29;

	if (hd->bctx.count < 56)
	{ /* enough room */
	hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
	while (hd->bctx.count < 56)
	hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
	}
	else
	{ /* need one extra block */
	hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
	while (hd->bctx.count < 64)
	hd->bctx.buf[hd->bctx.count++] = 0;
	_gcry_md_block_write (hd, NULL, 0); /* flush */;
	memset (hd->bctx.buf, 0, 56 ); /* fill next block with zeroes */
	}
	/* append the 64 bit count */
	buf_put_be32(hd->bctx.buf + 56, msb);
	buf_put_be32(hd->bctx.buf + 60, lsb);
	- burn = transform (hd, hd->bctx.buf, 1);
	+ burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
	_gcry_burn_stack (burn);

	p = hd->bctx.buf;
	#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
	X(0);
	X(1);
	X(2);
	X(3);
	X(4);
	X(5);
	X(6);
	X(7);
	#undef X
	}

	static byte *
	sha256_read (void *context)
	{
	SHA256_CONTEXT *hd = context;

	return hd->bctx.buf;
	}


	/* Shortcut functions which puts the hash value of the supplied buffer
	* into outbuf which must have a size of 32 bytes. */
	void
	_gcry_sha256_hash_buffer (void outbuf, const void buffer, size_t length)
	{
	SHA256_CONTEXT hd;

	sha256_init (&hd, 0);
	_gcry_md_block_write (&hd, buffer, length);
	sha256_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 32);
	}


	/* Variant of the above shortcut function using multiple buffers. */
	void
	_gcry_sha256_hash_buffers (void outbuf, const gcry_buffer_t iov, int iovcnt)
	{
	SHA256_CONTEXT hd;

	sha256_init (&hd, 0);
	for (;iovcnt > 0; iov++, iovcnt--)
	_gcry_md_block_write (&hd,
	(const char*)iov[0].data + iov[0].off, iov[0].len);
	sha256_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 32);
	}


	/* Shortcut functions which puts the hash value of the supplied buffer
	* into outbuf which must have a size of 28 bytes. */
	static void
	_gcry_sha224_hash_buffer (void outbuf, const void buffer, size_t length)
	{
	SHA256_CONTEXT hd;

	sha224_init (&hd, 0);
	_gcry_md_block_write (&hd, buffer, length);
	sha256_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 28);
	}


	/* Variant of the above shortcut function using multiple buffers. */
	static void
	_gcry_sha224_hash_buffers (void outbuf, const gcry_buffer_t iov, int iovcnt)
	{
	SHA256_CONTEXT hd;

	sha224_init (&hd, 0);
	for (;iovcnt > 0; iov++, iovcnt--)
	_gcry_md_block_write (&hd,
	(const char*)iov[0].data + iov[0].off, iov[0].len);
	sha256_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 28);
	}



	/*
	Self-test section.
	*/


	static gpg_err_code_t
	selftests_sha224 (int extended, selftest_report_func_t report)
	{
	const char *what;
	const char *errtxt;

	what = "short string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA224, 0,
	"abc", 3,
	"\x23\x09\x7d\x22\x34\x05\xd8\x22\x86\x42\xa4\x77\xbd\xa2\x55\xb3"
	"\x2a\xad\xbc\xe4\xbd\xa0\xb3\xf7\xe3\x6c\x9d\xa7", 28);
	if (errtxt)
	goto failed;

	if (extended)
	{
	what = "long string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA224, 0,
	"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
	"\x75\x38\x8b\x16\x51\x27\x76\xcc\x5d\xba\x5d\xa1\xfd\x89\x01\x50"
	"\xb0\xc6\x45\x5c\xb4\xf5\x8b\x19\x52\x52\x25\x25", 28);
	if (errtxt)
	goto failed;

	what = "one million \"a\"";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA224, 1,
	NULL, 0,
	"\x20\x79\x46\x55\x98\x0c\x91\xd8\xbb\xb4\xc1\xea\x97\x61\x8a\x4b"
	"\xf0\x3f\x42\x58\x19\x48\xb2\xee\x4e\xe7\xad\x67", 28);
	if (errtxt)
	goto failed;
	}

	return 0; /* Succeeded. */

	failed:
	if (report)
	report ("digest", GCRY_MD_SHA224, what, errtxt);
	return GPG_ERR_SELFTEST_FAILED;
	}

	static gpg_err_code_t
	selftests_sha256 (int extended, selftest_report_func_t report)
	{
	const char *what;
	const char *errtxt;

	what = "short string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA256, 0,
	"abc", 3,
	"\xba\x78\x16\xbf\x8f\x01\xcf\xea\x41\x41\x40\xde\x5d\xae\x22\x23"
	"\xb0\x03\x61\xa3\x96\x17\x7a\x9c\xb4\x10\xff\x61\xf2\x00\x15\xad", 32);
	if (errtxt)
	goto failed;

	if (extended)
	{
	what = "long string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA256, 0,
	"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
	"\x24\x8d\x6a\x61\xd2\x06\x38\xb8\xe5\xc0\x26\x93\x0c\x3e\x60\x39"
	"\xa3\x3c\xe4\x59\x64\xff\x21\x67\xf6\xec\xed\xd4\x19\xdb\x06\xc1",
	32);
	if (errtxt)
	goto failed;

	what = "one million \"a\"";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA256, 1,
	NULL, 0,
	"\xcd\xc7\x6e\x5c\x99\x14\xfb\x92\x81\xa1\xc7\xe2\x84\xd7\x3e\x67"
	"\xf1\x80\x9a\x48\xa4\x97\x20\x0e\x04\x6d\x39\xcc\xc7\x11\x2c\xd0",
	32);
	if (errtxt)
	goto failed;
	}

	return 0; /* Succeeded. */

	failed:
	if (report)
	report ("digest", GCRY_MD_SHA256, what, errtxt);
	return GPG_ERR_SELFTEST_FAILED;
	}


	/* Run a full self-test for ALGO and return 0 on success. */
	static gpg_err_code_t
	run_selftests (int algo, int extended, selftest_report_func_t report)
	{
	gpg_err_code_t ec;

	switch (algo)
	{
	case GCRY_MD_SHA224:
	ec = selftests_sha224 (extended, report);
	break;
	case GCRY_MD_SHA256:
	ec = selftests_sha256 (extended, report);
	break;
	default:
	ec = GPG_ERR_DIGEST_ALGO;
	break;

	}
	return ec;
	}




	static byte asn224[19] = /* Object ID is 2.16.840.1.101.3.4.2.4 */
	{ 0x30, 0x2D, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48,
	0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x05, 0x00, 0x04,
	0x1C
	};

	static gcry_md_oid_spec_t oid_spec_sha224[] =
	{
	/* From RFC3874, Section 4 */
	{ "2.16.840.1.101.3.4.2.4" },
	{ NULL },
	};

	static byte asn256[19] = /* Object ID is 2.16.840.1.101.3.4.2.1 */
	{ 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
	0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05,
	0x00, 0x04, 0x20 };

	static gcry_md_oid_spec_t oid_spec_sha256[] =
	{
	/* According to the OpenPGP draft rfc2440-bis06 */
	{ "2.16.840.1.101.3.4.2.1" },
	/* PKCS#1 sha256WithRSAEncryption */
	{ "1.2.840.113549.1.1.11" },

	{ NULL },
	};

	gcry_md_spec_t _gcry_digest_spec_sha224 =
	{
	GCRY_MD_SHA224, {0, 1},
	"SHA224", asn224, DIM (asn224), oid_spec_sha224, 28,
	sha224_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
	_gcry_sha224_hash_buffer, _gcry_sha224_hash_buffers,
	sizeof (SHA256_CONTEXT),
	run_selftests
	};

	gcry_md_spec_t _gcry_digest_spec_sha256 =
	{
	GCRY_MD_SHA256, {0, 1},
	"SHA256", asn256, DIM (asn256), oid_spec_sha256, 32,
	sha256_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
	_gcry_sha256_hash_buffer, _gcry_sha256_hash_buffers,
	sizeof (SHA256_CONTEXT),
	run_selftests
	};
	diff --git a/cipher/sha512-armv7-neon.S b/cipher/sha512-armv7-neon.S
	index a9d12724..6596f2cd 100644
	--- a/cipher/sha512-armv7-neon.S
	+++ b/cipher/sha512-armv7-neon.S
	@@ -1,449 +1,450 @@
	/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
	*
	* Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
	*
	* This file is part of Libgcrypt.
	*
	* Libgcrypt is free software; you can redistribute it and/or modify
	* it under the terms of the GNU Lesser General Public License as
	* published by the Free Software Foundation; either version 2.1 of
	* the License, or (at your option) any later version.
	*
	* Libgcrypt is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this program; if not, see <http://www.gnu.org/licenses/>.
	*/

	#include <config.h>

	#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
	defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
	defined(HAVE_GCC_INLINE_ASM_NEON)

	.text

	.syntax unified
	.fpu neon
	.arm

	/* structure of SHA512_CONTEXT */
	#define hd_a 0
	#define hd_b ((hd_a) + 8)
	#define hd_c ((hd_b) + 8)
	#define hd_d ((hd_c) + 8)
	#define hd_e ((hd_d) + 8)
	#define hd_f ((hd_e) + 8)
	#define hd_g ((hd_f) + 8)

	/* register macros */
	#define RK %r2

	#define RA d0
	#define RB d1
	#define RC d2
	#define RD d3
	#define RE d4
	#define RF d5
	#define RG d6
	#define RH d7

	#define RT0 d8
	#define RT1 d9
	#define RT2 d10
	#define RT3 d11
	#define RT4 d12
	#define RT5 d13
	#define RT6 d14
	#define RT7 d15

	#define RT01q q4
	#define RT23q q5
	#define RT45q q6
	#define RT67q q7

	#define RW0 d16
	#define RW1 d17
	#define RW2 d18
	#define RW3 d19
	#define RW4 d20
	#define RW5 d21
	#define RW6 d22
	#define RW7 d23
	#define RW8 d24
	#define RW9 d25
	#define RW10 d26
	#define RW11 d27
	#define RW12 d28
	#define RW13 d29
	#define RW14 d30
	#define RW15 d31

	#define RW01q q8
	#define RW23q q9
	#define RW45q q10
	#define RW67q q11
	#define RW89q q12
	#define RW1011q q13
	#define RW1213q q14
	#define RW1415q q15

	/***********************************************************************
	* ARM assembly implementation of sha512 transform
	***********************************************************************/
	#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
	vshr.u64 RT2, re, #14; \
	vshl.u64 RT3, re, #64 - 14; \
	interleave_op(arg1); \
	vshr.u64 RT4, re, #18; \
	vshl.u64 RT5, re, #64 - 18; \
	vld1.64 {RT0}, [RK]!; \
	veor.64 RT23q, RT23q, RT45q; \
	vshr.u64 RT4, re, #41; \
	vshl.u64 RT5, re, #64 - 41; \
	vadd.u64 RT0, RT0, rw0; \
	veor.64 RT23q, RT23q, RT45q; \
	vmov.64 RT7, re; \
	veor.64 RT1, RT2, RT3; \
	vbsl.64 RT7, rf, rg; \
	\
	vadd.u64 RT1, RT1, rh; \
	vshr.u64 RT2, ra, #28; \
	vshl.u64 RT3, ra, #64 - 28; \
	vadd.u64 RT1, RT1, RT0; \
	vshr.u64 RT4, ra, #34; \
	vshl.u64 RT5, ra, #64 - 34; \
	vadd.u64 RT1, RT1, RT7; \
	\
	/* h = Sum0 (a) + Maj (a, b, c); */ \
	veor.64 RT23q, RT23q, RT45q; \
	vshr.u64 RT4, ra, #39; \
	vshl.u64 RT5, ra, #64 - 39; \
	veor.64 RT0, ra, rb; \
	veor.64 RT23q, RT23q, RT45q; \
	vbsl.64 RT0, rc, rb; \
	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
	veor.64 rh, RT2, RT3; \
	\
	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
	vshr.u64 RT2, rd, #14; \
	vshl.u64 RT3, rd, #64 - 14; \
	vadd.u64 rh, rh, RT0; \
	vshr.u64 RT4, rd, #18; \
	vshl.u64 RT5, rd, #64 - 18; \
	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
	vld1.64 {RT0}, [RK]!; \
	veor.64 RT23q, RT23q, RT45q; \
	vshr.u64 RT4, rd, #41; \
	vshl.u64 RT5, rd, #64 - 41; \
	vadd.u64 RT0, RT0, rw1; \
	veor.64 RT23q, RT23q, RT45q; \
	vmov.64 RT7, rd; \
	veor.64 RT1, RT2, RT3; \
	vbsl.64 RT7, re, rf; \
	\
	vadd.u64 RT1, RT1, rg; \
	vshr.u64 RT2, rh, #28; \
	vshl.u64 RT3, rh, #64 - 28; \
	vadd.u64 RT1, RT1, RT0; \
	vshr.u64 RT4, rh, #34; \
	vshl.u64 RT5, rh, #64 - 34; \
	vadd.u64 RT1, RT1, RT7; \
	\
	/* g = Sum0 (h) + Maj (h, a, b); */ \
	veor.64 RT23q, RT23q, RT45q; \
	vshr.u64 RT4, rh, #39; \
	vshl.u64 RT5, rh, #64 - 39; \
	veor.64 RT0, rh, ra; \
	veor.64 RT23q, RT23q, RT45q; \
	vbsl.64 RT0, rb, ra; \
	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
	veor.64 rg, RT2, RT3; \
	\
	/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
	/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
	\
	/**** S0(w[1:2]) */ \
	\
	/* w[0:1] += w[9:10] */ \
	/* RT23q = rw1:rw2 */ \
	vext.u64 RT23q, rw01q, rw23q, #1; \
	vadd.u64 rw0, rw9; \
	vadd.u64 rg, rg, RT0; \
	vadd.u64 rw1, rw10;\
	vadd.u64 rg, rg, RT1; /* g+=t1; */ \
	\
	vshr.u64 RT45q, RT23q, #1; \
	vshl.u64 RT67q, RT23q, #64 - 1; \
	vshr.u64 RT01q, RT23q, #8; \
	veor.u64 RT45q, RT45q, RT67q; \
	vshl.u64 RT67q, RT23q, #64 - 8; \
	veor.u64 RT45q, RT45q, RT01q; \
	vshr.u64 RT01q, RT23q, #7; \
	veor.u64 RT45q, RT45q, RT67q; \
	\
	/**** S1(w[14:15]) */ \
	vshr.u64 RT23q, rw1415q, #6; \
	veor.u64 RT01q, RT01q, RT45q; \
	vshr.u64 RT45q, rw1415q, #19; \
	vshl.u64 RT67q, rw1415q, #64 - 19; \
	veor.u64 RT23q, RT23q, RT45q; \
	vshr.u64 RT45q, rw1415q, #61; \
	veor.u64 RT23q, RT23q, RT67q; \
	vshl.u64 RT67q, rw1415q, #64 - 61; \
	veor.u64 RT23q, RT23q, RT45q; \
	vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
	veor.u64 RT01q, RT23q, RT67q;
	#define vadd_RT01q(rw01q) \
	/* w[0:1] += S(w[14:15]) */ \
	vadd.u64 rw01q, RT01q;

	#define dummy(_) /_/

	#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, interleave_op1, arg1, interleave_op2, arg2) \
	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
	vshr.u64 RT2, re, #14; \
	vshl.u64 RT3, re, #64 - 14; \
	interleave_op1(arg1); \
	vshr.u64 RT4, re, #18; \
	vshl.u64 RT5, re, #64 - 18; \
	interleave_op2(arg2); \
	vld1.64 {RT0}, [RK]!; \
	veor.64 RT23q, RT23q, RT45q; \
	vshr.u64 RT4, re, #41; \
	vshl.u64 RT5, re, #64 - 41; \
	vadd.u64 RT0, RT0, rw0; \
	veor.64 RT23q, RT23q, RT45q; \
	vmov.64 RT7, re; \
	veor.64 RT1, RT2, RT3; \
	vbsl.64 RT7, rf, rg; \
	\
	vadd.u64 RT1, RT1, rh; \
	vshr.u64 RT2, ra, #28; \
	vshl.u64 RT3, ra, #64 - 28; \
	vadd.u64 RT1, RT1, RT0; \
	vshr.u64 RT4, ra, #34; \
	vshl.u64 RT5, ra, #64 - 34; \
	vadd.u64 RT1, RT1, RT7; \
	\
	/* h = Sum0 (a) + Maj (a, b, c); */ \
	veor.64 RT23q, RT23q, RT45q; \
	vshr.u64 RT4, ra, #39; \
	vshl.u64 RT5, ra, #64 - 39; \
	veor.64 RT0, ra, rb; \
	veor.64 RT23q, RT23q, RT45q; \
	vbsl.64 RT0, rc, rb; \
	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
	veor.64 rh, RT2, RT3; \
	\
	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
	vshr.u64 RT2, rd, #14; \
	vshl.u64 RT3, rd, #64 - 14; \
	vadd.u64 rh, rh, RT0; \
	vshr.u64 RT4, rd, #18; \
	vshl.u64 RT5, rd, #64 - 18; \
	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
	vld1.64 {RT0}, [RK]!; \
	veor.64 RT23q, RT23q, RT45q; \
	vshr.u64 RT4, rd, #41; \
	vshl.u64 RT5, rd, #64 - 41; \
	vadd.u64 RT0, RT0, rw1; \
	veor.64 RT23q, RT23q, RT45q; \
	vmov.64 RT7, rd; \
	veor.64 RT1, RT2, RT3; \
	vbsl.64 RT7, re, rf; \
	\
	vadd.u64 RT1, RT1, rg; \
	vshr.u64 RT2, rh, #28; \
	vshl.u64 RT3, rh, #64 - 28; \
	vadd.u64 RT1, RT1, RT0; \
	vshr.u64 RT4, rh, #34; \
	vshl.u64 RT5, rh, #64 - 34; \
	vadd.u64 RT1, RT1, RT7; \
	\
	/* g = Sum0 (h) + Maj (h, a, b); */ \
	veor.64 RT23q, RT23q, RT45q; \
	vshr.u64 RT4, rh, #39; \
	vshl.u64 RT5, rh, #64 - 39; \
	veor.64 RT0, rh, ra; \
	veor.64 RT23q, RT23q, RT45q; \
	vbsl.64 RT0, rb, ra; \
	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
	veor.64 rg, RT2, RT3;
	#define vadd_rg_RT0(rg) \
	vadd.u64 rg, rg, RT0;
	#define vadd_rg_RT1(rg) \
	vadd.u64 rg, rg, RT1; /* g+=t1; */

	.align 3
	.globl _gcry_sha512_transform_armv7_neon
	.type _gcry_sha512_transform_armv7_neon,%function;

	_gcry_sha512_transform_armv7_neon:
	/* Input:
	* %r0: SHA512_CONTEXT
	* %r1: data
	* %r2: u64 k[] constants
	* %r3: nblks
	*/
	push {%lr};

	mov %lr, #0;

	/* Load context to d0-d7 */
	vld1.64 {RA-RD}, [%r0]!;
	vld1.64 {RE-RH}, [%r0];
	sub %r0, #(4*8);

	/* Load input to w[16], d16-d31 */
	/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
	vld1.64 {RW0-RW3}, [%r1]!;
	vld1.64 {RW4-RW7}, [%r1]!;
	vld1.64 {RW8-RW11}, [%r1]!;
	vld1.64 {RW12-RW15}, [%r1]!;
	#ifdef __ARMEL__
	/* byteswap */
	vrev64.8 RW01q, RW01q;
	vrev64.8 RW23q, RW23q;
	vrev64.8 RW45q, RW45q;
	vrev64.8 RW67q, RW67q;
	vrev64.8 RW89q, RW89q;
	vrev64.8 RW1011q, RW1011q;
	vrev64.8 RW1213q, RW1213q;
	vrev64.8 RW1415q, RW1415q;
	#endif

	/* EABI says that d8-d15 must be preserved by callee. */
	vpush {RT0-RT7};

	.Loop:
	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, dummy, _);
	b .Lenter_rounds;

	.Loop_rounds:
	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
	.Lenter_rounds:
	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
	add %lr, #16;
	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
	cmp %lr, #64;
	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
	bne .Loop_rounds;

	subs %r3, #1;

	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, vadd_RT01q, RW1415q, dummy, _);
	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
	beq .Lhandle_tail;
	vld1.64 {RW0-RW3}, [%r1]!;
	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
	#ifdef __ARMEL__
	vrev64.8 RW01q, RW01q;
	vrev64.8 RW23q, RW23q;
	#endif
	vld1.64 {RW4-RW7}, [%r1]!;
	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA);
	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
	#ifdef __ARMEL__
	vrev64.8 RW45q, RW45q;
	vrev64.8 RW67q, RW67q;
	#endif
	vld1.64 {RW8-RW11}, [%r1]!;
	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
	#ifdef __ARMEL__
	vrev64.8 RW89q, RW89q;
	vrev64.8 RW1011q, RW1011q;
	#endif
	vld1.64 {RW12-RW15}, [%r1]!;
	vadd_rg_RT0(RA);
	vadd_rg_RT1(RA);

	/* Load context */
	vld1.64 {RT0-RT3}, [%r0]!;
	vld1.64 {RT4-RT7}, [%r0];
	sub %r0, #(4*8);

	#ifdef __ARMEL__
	vrev64.8 RW1213q, RW1213q;
	vrev64.8 RW1415q, RW1415q;
	#endif

	vadd.u64 RA, RT0;
	vadd.u64 RB, RT1;
	vadd.u64 RC, RT2;
	vadd.u64 RD, RT3;
	vadd.u64 RE, RT4;
	vadd.u64 RF, RT5;
	vadd.u64 RG, RT6;
	vadd.u64 RH, RT7;

	/* Store the first half of context */
	vst1.64 {RA-RD}, [%r0]!;
	sub RK, $(8*80);
	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
	mov %lr, #0;
	sub %r0, #(4*8);

	b .Loop;
	.ltorg

	.Lhandle_tail:
	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC);
	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA);
	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG);
	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE);
	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC);

	/* Load context to d16-d23 */
	vld1.64 {RW0-RW3}, [%r0]!;
	vadd_rg_RT0(RA);
	vld1.64 {RW4-RW7}, [%r0];
	vadd_rg_RT1(RA);
	sub %r0, #(4*8);

	vadd.u64 RA, RW0;
	vadd.u64 RB, RW1;
	vadd.u64 RC, RW2;
	vadd.u64 RD, RW3;
	vadd.u64 RE, RW4;
	vadd.u64 RF, RW5;
	vadd.u64 RG, RW6;
	vadd.u64 RH, RW7;

	/* Store the first half of context */
	vst1.64 {RA-RD}, [%r0]!;

	/* Clear used registers */
	/* d16-d31 */
	veor.u64 RW01q, RW01q;
	veor.u64 RW23q, RW23q;
	veor.u64 RW45q, RW45q;
	veor.u64 RW67q, RW67q;
	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
	veor.u64 RW89q, RW89q;
	veor.u64 RW1011q, RW1011q;
	veor.u64 RW1213q, RW1213q;
	veor.u64 RW1415q, RW1415q;
	/* d8-d15 */
	vpop {RT0-RT7};
	/* d0-d7 (q0-q3) */
	veor.u64 %q0, %q0;
	veor.u64 %q1, %q1;
	veor.u64 %q2, %q2;
	veor.u64 %q3, %q3;

	+ eor %r0, %r0;
	pop {%pc};
	.size _gcry_sha512_transform_armv7_neon,.-_gcry_sha512_transform_armv7_neon;

	#endif
	diff --git a/cipher/sha512.c b/cipher/sha512.c
	index 9405de80..721f3405 100644
	--- a/cipher/sha512.c
	+++ b/cipher/sha512.c
	@@ -1,991 +1,951 @@
	/* sha512.c - SHA384 and SHA512 hash functions
	* Copyright (C) 2003, 2008, 2009 Free Software Foundation, Inc.
	*
	* This file is part of Libgcrypt.
	*
	* Libgcrypt is free software; you can redistribute it and/or modify
	* it under the terms of the GNU Lesser general Public License as
	* published by the Free Software Foundation; either version 2.1 of
	* the License, or (at your option) any later version.
	*
	* Libgcrypt is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this program; if not, see <http://www.gnu.org/licenses/>.
	*/


	/* Test vectors from FIPS-180-2:
	*
	* "abc"
	* 384:
	* CB00753F 45A35E8B B5A03D69 9AC65007 272C32AB 0EDED163
	* 1A8B605A 43FF5BED 8086072B A1E7CC23 58BAECA1 34C825A7
	* 512:
	* DDAF35A1 93617ABA CC417349 AE204131 12E6FA4E 89A97EA2 0A9EEEE6 4B55D39A
	* 2192992A 274FC1A8 36BA3C23 A3FEEBBD 454D4423 643CE80E 2A9AC94F A54CA49F
	*
	* "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"
	* 384:
	* 09330C33 F71147E8 3D192FC7 82CD1B47 53111B17 3B3B05D2
	* 2FA08086 E3B0F712 FCC7C71A 557E2DB9 66C3E9FA 91746039
	* 512:
	* 8E959B75 DAE313DA 8CF4F728 14FC143F 8F7779C6 EB9F7FA1 7299AEAD B6889018
	* 501D289E 4900F7E4 331B99DE C4B5433A C7D329EE B6DD2654 5E96E55B 874BE909
	*
	* "a" x 1000000
	* 384:
	* 9D0E1809 716474CB 086E834E 310A4A1C ED149E9C 00F24852
	* 7972CEC5 704C2A5B 07B8B3DC 38ECC4EB AE97DDD8 7F3D8985
	* 512:
	* E718483D 0CE76964 4E2E42C7 BC15B463 8E1F98B1 3B204428 5632A803 AFA973EB
	* DE0FF244 877EA60A 4CB0432C E577C31B EB009C5C 2C49AA2E 4EADB217 AD8CC09B
	*/


	#include <config.h>
	#include <string.h>
	#include "g10lib.h"
	#include "bithelp.h"
	#include "bufhelp.h"
	#include "cipher.h"
	#include "hash-common.h"


	/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */
	#undef USE_ARM_NEON_ASM
	#ifdef ENABLE_NEON_SUPPORT
	# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
	&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
	&& defined(HAVE_GCC_INLINE_ASM_NEON)
	# define USE_ARM_NEON_ASM 1
	# endif
	#endif /ENABLE_NEON_SUPPORT/


	/* USE_ARM_ASM indicates whether to enable ARM assembly code. */
	#undef USE_ARM_ASM
	#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
	# define USE_ARM_ASM 1
	#endif


	/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
	#undef USE_SSSE3
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
	defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_SSSE3 1
	#endif


	/* USE_AVX indicates whether to compile with Intel AVX code. */
	#undef USE_AVX
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
	defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_AVX 1
	#endif


	/* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */
	#undef USE_AVX2
	#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
	defined(HAVE_GCC_INLINE_ASM_BMI2) && \
	defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
	(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) \|\| \
	defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
	# define USE_AVX2 1
	#endif


	typedef struct
	{
	u64 h0, h1, h2, h3, h4, h5, h6, h7;
	} SHA512_STATE;

	typedef struct
	{
	gcry_md_block_ctx_t bctx;
	SHA512_STATE state;
	+} SHA512_CONTEXT;
	+
	+
	+static const u64 k[] =
	+ {
	+ U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
	+ U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
	+ U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
	+ U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
	+ U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
	+ U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
	+ U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
	+ U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
	+ U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
	+ U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
	+ U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
	+ U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
	+ U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
	+ U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
	+ U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
	+ U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
	+ U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
	+ U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
	+ U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
	+ U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
	+ U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
	+ U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
	+ U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
	+ U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
	+ U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
	+ U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
	+ U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
	+ U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
	+ U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
	+ U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
	+ U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
	+ U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
	+ U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
	+ U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
	+ U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
	+ U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
	+ U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
	+ U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
	+ U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
	+ U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
	+ };
	+
	+
	+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
	+ * stack to store XMM6-XMM15 needed on Win64. */
	+#undef ASM_FUNC_ABI
	+#undef ASM_EXTRA_STACK
	+#if defined(USE_SSSE3) \|\| defined(USE_AVX) \|\| defined(USE_AVX2)
	+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
	+# define ASM_FUNC_ABI __attribute__((sysv_abi))
	+# define ASM_EXTRA_STACK (10 * 16 + 4 * sizeof(void *))
	+# else
	+# define ASM_FUNC_ABI
	+# define ASM_EXTRA_STACK 0
	+# endif
	+#endif
	+
	+
	#ifdef USE_ARM_NEON_ASM
	- unsigned int use_neon:1;
	+unsigned int _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
	+ const unsigned char *data,
	+ const u64 k[], size_t num_blks);
	+
	+static unsigned int
	+do_sha512_transform_armv7_neon(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA512_CONTEXT *hd = ctx;
	+ return _gcry_sha512_transform_armv7_neon (&hd->state, data, k, nblks);
	+}
	#endif
	+
	#ifdef USE_SSSE3
	- unsigned int use_ssse3:1;
	+unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
	+ void *state,
	+ size_t num_blks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha512_transform_amd64_ssse3(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA512_CONTEXT *hd = ctx;
	+ return _gcry_sha512_transform_amd64_ssse3 (data, &hd->state, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	#endif
	+
	#ifdef USE_AVX
	- unsigned int use_avx:1;
	+unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
	+ void *state,
	+ size_t num_blks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha512_transform_amd64_avx(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA512_CONTEXT *hd = ctx;
	+ return _gcry_sha512_transform_amd64_avx (data, &hd->state, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	#endif
	+
	#ifdef USE_AVX2
	- unsigned int use_avx2:1;
	+unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
	+ void *state,
	+ size_t num_blks) ASM_FUNC_ABI;
	+
	+static unsigned int
	+do_sha512_transform_amd64_avx2(void ctx, const unsigned char data,
	+ size_t nblks)
	+{
	+ SHA512_CONTEXT *hd = ctx;
	+ return _gcry_sha512_transform_amd64_avx2 (data, &hd->state, nblks)
	+ + ASM_EXTRA_STACK;
	+}
	#endif
	-} SHA512_CONTEXT;
	+
	+
	+#ifdef USE_ARM_ASM
	+unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
	+ const unsigned char *data,
	+ const u64 k[], size_t num_blks);

	static unsigned int
	-transform (void context, const unsigned char data, size_t nblks);
	+do_transform_generic (void context, const unsigned char data, size_t nblks)
	+{
	+ SHA512_CONTEXT *hd = context;
	+ return _gcry_sha512_transform_armv7_neon (&hd->state, data, k, nblks);
	+}
	+#else
	+static unsigned int
	+do_transform_generic (void context, const unsigned char data, size_t nblks);
	+#endif
	+

	static void
	sha512_init (void *context, unsigned int flags)
	{
	SHA512_CONTEXT *ctx = context;
	SHA512_STATE *hd = &ctx->state;
	unsigned int features = _gcry_get_hw_features ();

	(void)flags;
	+ (void)k;

	hd->h0 = U64_C(0x6a09e667f3bcc908);
	hd->h1 = U64_C(0xbb67ae8584caa73b);
	hd->h2 = U64_C(0x3c6ef372fe94f82b);
	hd->h3 = U64_C(0xa54ff53a5f1d36f1);
	hd->h4 = U64_C(0x510e527fade682d1);
	hd->h5 = U64_C(0x9b05688c2b3e6c1f);
	hd->h6 = U64_C(0x1f83d9abfb41bd6b);
	hd->h7 = U64_C(0x5be0cd19137e2179);

	ctx->bctx.nblocks = 0;
	ctx->bctx.nblocks_high = 0;
	ctx->bctx.count = 0;
	ctx->bctx.blocksize = 128;
	- ctx->bctx.bwrite = transform;

	+ /* Order of feature checks is important here; last match will be
	+ * selected. Keep slower implementations at the top and faster at
	+ * the bottom. */
	+ ctx->bctx.bwrite = do_transform_generic;
	#ifdef USE_ARM_NEON_ASM
	- ctx->use_neon = (features & HWF_ARM_NEON) != 0;
	+ if ((features & HWF_ARM_NEON) != 0)
	+ ctx->bctx.bwrite = do_sha512_transform_armv7_neon;
	#endif
	#ifdef USE_SSSE3
	- ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
	+ if ((features & HWF_INTEL_SSSE3) != 0)
	+ ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3;
	#endif
	#ifdef USE_AVX
	- ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
	+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
	+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx;
	#endif
	#ifdef USE_AVX2
	- ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
	+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
	+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx2;
	#endif
	-
	(void)features;
	}

	static void
	sha384_init (void *context, unsigned int flags)
	{
	SHA512_CONTEXT *ctx = context;
	SHA512_STATE *hd = &ctx->state;
	unsigned int features = _gcry_get_hw_features ();

	(void)flags;

	hd->h0 = U64_C(0xcbbb9d5dc1059ed8);
	hd->h1 = U64_C(0x629a292a367cd507);
	hd->h2 = U64_C(0x9159015a3070dd17);
	hd->h3 = U64_C(0x152fecd8f70e5939);
	hd->h4 = U64_C(0x67332667ffc00b31);
	hd->h5 = U64_C(0x8eb44a8768581511);
	hd->h6 = U64_C(0xdb0c2e0d64f98fa7);
	hd->h7 = U64_C(0x47b5481dbefa4fa4);

	ctx->bctx.nblocks = 0;
	ctx->bctx.nblocks_high = 0;
	ctx->bctx.count = 0;
	ctx->bctx.blocksize = 128;
	- ctx->bctx.bwrite = transform;

	+ /* Order of feature checks is important here; last match will be
	+ * selected. Keep slower implementations at the top and faster at
	+ * the bottom. */
	+ ctx->bctx.bwrite = do_transform_generic;
	#ifdef USE_ARM_NEON_ASM
	- ctx->use_neon = (features & HWF_ARM_NEON) != 0;
	+ if ((features & HWF_ARM_NEON) != 0)
	+ ctx->bctx.bwrite = do_sha512_transform_armv7_neon;
	#endif
	#ifdef USE_SSSE3
	- ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
	+ if ((features & HWF_INTEL_SSSE3) != 0)
	+ ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3;
	#endif
	#ifdef USE_AVX
	- ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
	+ if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
	+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx;
	#endif
	#ifdef USE_AVX2
	- ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
	+ if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
	+ ctx->bctx.bwrite = do_sha512_transform_amd64_avx2;
	#endif
	-
	(void)features;
	}


	-static const u64 k[] =
	- {
	- U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
	- U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
	- U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
	- U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
	- U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
	- U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
	- U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
	- U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
	- U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
	- U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
	- U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
	- U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
	- U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
	- U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
	- U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
	- U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
	- U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
	- U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
	- U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
	- U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
	- U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
	- U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
	- U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
	- U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
	- U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
	- U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
	- U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
	- U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
	- U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
	- U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
	- U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
	- U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
	- U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
	- U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
	- U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
	- U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
	- U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
	- U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
	- U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
	- U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
	- };
	-
	#ifndef USE_ARM_ASM

	static inline u64
	ROTR (u64 x, u64 n)
	{
	return ((x >> n) \| (x << (64 - n)));
	}

	static inline u64
	Ch (u64 x, u64 y, u64 z)
	{
	return ((x & y) ^ ( ~x & z));
	}

	static inline u64
	Maj (u64 x, u64 y, u64 z)
	{
	return ((x & y) ^ (x & z) ^ (y & z));
	}

	static inline u64
	Sum0 (u64 x)
	{
	return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39));
	}

	static inline u64
	Sum1 (u64 x)
	{
	return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41));
	}

	/****************
	* Transform the message W which consists of 16 64-bit-words
	*/
	static unsigned int
	-transform_blk (SHA512_STATE hd, const unsigned char data)
	-{
	- u64 a, b, c, d, e, f, g, h;
	- u64 w[16];
	- int t;
	-
	- /* get values from the chaining vars */
	- a = hd->h0;
	- b = hd->h1;
	- c = hd->h2;
	- d = hd->h3;
	- e = hd->h4;
	- f = hd->h5;
	- g = hd->h6;
	- h = hd->h7;
	-
	- for ( t = 0; t < 16; t++ )
	- w[t] = buf_get_be64(data + t * 8);
	-
	-#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
	-#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
	-
	- for (t = 0; t < 80 - 16; )
	- {
	- u64 t1, t2;
	-
	- /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e
	- with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes
	- initialized to 0,1,2,3...255,0,... and 1000 iterations:
	-
	- Not unrolled with macros: 440ms
	- Unrolled with macros: 350ms
	- Unrolled with inline: 330ms
	- */
	-#if 0 /* Not unrolled. */
	- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16];
	- w[t%16] += S1 (w[(t - 2)%16]) + w[(t - 7)%16] + S0 (w[(t - 15)%16]);
	- t2 = Sum0 (a) + Maj (a, b, c);
	- h = g;
	- g = f;
	- f = e;
	- e = d + t1;
	- d = c;
	- c = b;
	- b = a;
	- a = t1 + t2;
	- t++;
	-#else /* Unrolled to interweave the chain variables. */
	- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
	- w[0] += S1 (w[14]) + w[9] + S0 (w[1]);
	- t2 = Sum0 (a) + Maj (a, b, c);
	- d += t1;
	- h = t1 + t2;
	-
	- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
	- w[1] += S1 (w[15]) + w[10] + S0 (w[2]);
	- t2 = Sum0 (h) + Maj (h, a, b);
	- c += t1;
	- g = t1 + t2;
	-
	- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
	- w[2] += S1 (w[0]) + w[11] + S0 (w[3]);
	- t2 = Sum0 (g) + Maj (g, h, a);
	- b += t1;
	- f = t1 + t2;
	-
	- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
	- w[3] += S1 (w[1]) + w[12] + S0 (w[4]);
	- t2 = Sum0 (f) + Maj (f, g, h);
	- a += t1;
	- e = t1 + t2;
	-
	- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
	- w[4] += S1 (w[2]) + w[13] + S0 (w[5]);
	- t2 = Sum0 (e) + Maj (e, f, g);
	- h += t1;
	- d = t1 + t2;
	-
	- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
	- w[5] += S1 (w[3]) + w[14] + S0 (w[6]);
	- t2 = Sum0 (d) + Maj (d, e, f);
	- g += t1;
	- c = t1 + t2;
	-
	- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
	- w[6] += S1 (w[4]) + w[15] + S0 (w[7]);
	- t2 = Sum0 (c) + Maj (c, d, e);
	- f += t1;
	- b = t1 + t2;
	-
	- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
	- w[7] += S1 (w[5]) + w[0] + S0 (w[8]);
	- t2 = Sum0 (b) + Maj (b, c, d);
	- e += t1;
	- a = t1 + t2;
	-
	- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
	- w[8] += S1 (w[6]) + w[1] + S0 (w[9]);
	- t2 = Sum0 (a) + Maj (a, b, c);
	- d += t1;
	- h = t1 + t2;
	-
	- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
	- w[9] += S1 (w[7]) + w[2] + S0 (w[10]);
	- t2 = Sum0 (h) + Maj (h, a, b);
	- c += t1;
	- g = t1 + t2;
	-
	- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
	- w[10] += S1 (w[8]) + w[3] + S0 (w[11]);
	- t2 = Sum0 (g) + Maj (g, h, a);
	- b += t1;
	- f = t1 + t2;
	-
	- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
	- w[11] += S1 (w[9]) + w[4] + S0 (w[12]);
	- t2 = Sum0 (f) + Maj (f, g, h);
	- a += t1;
	- e = t1 + t2;
	-
	- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
	- w[12] += S1 (w[10]) + w[5] + S0 (w[13]);
	- t2 = Sum0 (e) + Maj (e, f, g);
	- h += t1;
	- d = t1 + t2;
	-
	- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
	- w[13] += S1 (w[11]) + w[6] + S0 (w[14]);
	- t2 = Sum0 (d) + Maj (d, e, f);
	- g += t1;
	- c = t1 + t2;
	-
	- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
	- w[14] += S1 (w[12]) + w[7] + S0 (w[15]);
	- t2 = Sum0 (c) + Maj (c, d, e);
	- f += t1;
	- b = t1 + t2;
	-
	- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
	- w[15] += S1 (w[13]) + w[8] + S0 (w[0]);
	- t2 = Sum0 (b) + Maj (b, c, d);
	- e += t1;
	- a = t1 + t2;
	-
	- t += 16;
	-#endif
	- }
	-
	- for (; t < 80; )
	- {
	- u64 t1, t2;
	-
	-#if 0 /* Not unrolled. */
	- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16];
	- t2 = Sum0 (a) + Maj (a, b, c);
	- h = g;
	- g = f;
	- f = e;
	- e = d + t1;
	- d = c;
	- c = b;
	- b = a;
	- a = t1 + t2;
	- t++;
	-#else /* Unrolled to interweave the chain variables. */
	- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
	- t2 = Sum0 (a) + Maj (a, b, c);
	- d += t1;
	- h = t1 + t2;
	-
	- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
	- t2 = Sum0 (h) + Maj (h, a, b);
	- c += t1;
	- g = t1 + t2;
	-
	- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
	- t2 = Sum0 (g) + Maj (g, h, a);
	- b += t1;
	- f = t1 + t2;
	-
	- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
	- t2 = Sum0 (f) + Maj (f, g, h);
	- a += t1;
	- e = t1 + t2;
	-
	- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
	- t2 = Sum0 (e) + Maj (e, f, g);
	- h += t1;
	- d = t1 + t2;
	-
	- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
	- t2 = Sum0 (d) + Maj (d, e, f);
	- g += t1;
	- c = t1 + t2;
	-
	- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
	- t2 = Sum0 (c) + Maj (c, d, e);
	- f += t1;
	- b = t1 + t2;
	-
	- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
	- t2 = Sum0 (b) + Maj (b, c, d);
	- e += t1;
	- a = t1 + t2;
	-
	- t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
	- t2 = Sum0 (a) + Maj (a, b, c);
	- d += t1;
	- h = t1 + t2;
	-
	- t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
	- t2 = Sum0 (h) + Maj (h, a, b);
	- c += t1;
	- g = t1 + t2;
	-
	- t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
	- t2 = Sum0 (g) + Maj (g, h, a);
	- b += t1;
	- f = t1 + t2;
	-
	- t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
	- t2 = Sum0 (f) + Maj (f, g, h);
	- a += t1;
	- e = t1 + t2;
	-
	- t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
	- t2 = Sum0 (e) + Maj (e, f, g);
	- h += t1;
	- d = t1 + t2;
	-
	- t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
	- t2 = Sum0 (d) + Maj (d, e, f);
	- g += t1;
	- c = t1 + t2;
	-
	- t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
	- t2 = Sum0 (c) + Maj (c, d, e);
	- f += t1;
	- b = t1 + t2;
	-
	- t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
	- t2 = Sum0 (b) + Maj (b, c, d);
	- e += t1;
	- a = t1 + t2;
	-
	- t += 16;
	-#endif
	- }
	-
	- /* Update chaining vars. */
	- hd->h0 += a;
	- hd->h1 += b;
	- hd->h2 += c;
	- hd->h3 += d;
	- hd->h4 += e;
	- hd->h5 += f;
	- hd->h6 += g;
	- hd->h7 += h;
	-
	- return /* burn_stack / (8 + 16) sizeof(u64) + sizeof(u32) +
	- 3 * sizeof(void*);
	-}
	-#endif /!USE_ARM_ASM/
	-
	-/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
	- * stack to store XMM6-XMM15 needed on Win64. */
	-#undef ASM_FUNC_ABI
	-#undef ASM_EXTRA_STACK
	-#if defined(USE_SSSE3) \|\| defined(USE_AVX) \|\| defined(USE_AVX2)
	-# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
	-# define ASM_FUNC_ABI __attribute__((sysv_abi))
	-# define ASM_EXTRA_STACK (10 * 16)
	-# else
	-# define ASM_FUNC_ABI
	-# define ASM_EXTRA_STACK 0
	-# endif
	-#endif
	-
	-
	-#ifdef USE_ARM_NEON_ASM
	-void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
	- const unsigned char *data,
	- const u64 k[], size_t num_blks);
	-#endif
	-
	-#ifdef USE_ARM_ASM
	-unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
	- const unsigned char *data,
	- const u64 k[], size_t num_blks);
	-#endif
	-
	-#ifdef USE_SSSE3
	-unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
	- void *state,
	- size_t num_blks) ASM_FUNC_ABI;
	-#endif
	-
	-#ifdef USE_AVX
	-unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
	- void *state,
	- size_t num_blks) ASM_FUNC_ABI;
	-#endif
	-
	-#ifdef USE_AVX2
	-unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
	- void *state,
	- size_t num_blks) ASM_FUNC_ABI;
	-#endif
	-
	-
	-static unsigned int
	-transform (void context, const unsigned char data, size_t nblks)
	+do_transform_generic (void context, const unsigned char data, size_t nblks)
	{
	SHA512_CONTEXT *ctx = context;
	- unsigned int burn;
	-
	-#ifdef USE_AVX2
	- if (ctx->use_avx2)
	- return _gcry_sha512_transform_amd64_avx2 (data, &ctx->state, nblks)
	- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
	-#endif
	-
	-#ifdef USE_AVX
	- if (ctx->use_avx)
	- return _gcry_sha512_transform_amd64_avx (data, &ctx->state, nblks)
	- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
	-#endif
	-
	-#ifdef USE_SSSE3
	- if (ctx->use_ssse3)
	- return _gcry_sha512_transform_amd64_ssse3 (data, &ctx->state, nblks)
	- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
	-#endif
	+ SHA512_STATE *hd = &ctx->state;

	-#ifdef USE_ARM_NEON_ASM
	- if (ctx->use_neon)
	+ do
	{
	- _gcry_sha512_transform_armv7_neon (&ctx->state, data, k, nblks);
	+ u64 a, b, c, d, e, f, g, h;
	+ u64 w[16];
	+ int t;
	+
	+ /* get values from the chaining vars */
	+ a = hd->h0;
	+ b = hd->h1;
	+ c = hd->h2;
	+ d = hd->h3;
	+ e = hd->h4;
	+ f = hd->h5;
	+ g = hd->h6;
	+ h = hd->h7;
	+
	+ for ( t = 0; t < 16; t++ )
	+ w[t] = buf_get_be64(data + t * 8);

	- /* _gcry_sha512_transform_armv7_neon does not store sensitive data
	- * to stack. */
	- return /* no burn_stack */ 0;
	- }
	-#endif
	+#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
	+#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
	+
	+ for (t = 0; t < 80 - 16; )
	+ {
	+ u64 t1, t2;
	+
	+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
	+ w[0] += S1 (w[14]) + w[9] + S0 (w[1]);
	+ t2 = Sum0 (a) + Maj (a, b, c);
	+ d += t1;
	+ h = t1 + t2;
	+
	+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
	+ w[1] += S1 (w[15]) + w[10] + S0 (w[2]);
	+ t2 = Sum0 (h) + Maj (h, a, b);
	+ c += t1;
	+ g = t1 + t2;
	+
	+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
	+ w[2] += S1 (w[0]) + w[11] + S0 (w[3]);
	+ t2 = Sum0 (g) + Maj (g, h, a);
	+ b += t1;
	+ f = t1 + t2;
	+
	+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
	+ w[3] += S1 (w[1]) + w[12] + S0 (w[4]);
	+ t2 = Sum0 (f) + Maj (f, g, h);
	+ a += t1;
	+ e = t1 + t2;
	+
	+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
	+ w[4] += S1 (w[2]) + w[13] + S0 (w[5]);
	+ t2 = Sum0 (e) + Maj (e, f, g);
	+ h += t1;
	+ d = t1 + t2;
	+
	+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
	+ w[5] += S1 (w[3]) + w[14] + S0 (w[6]);
	+ t2 = Sum0 (d) + Maj (d, e, f);
	+ g += t1;
	+ c = t1 + t2;
	+
	+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
	+ w[6] += S1 (w[4]) + w[15] + S0 (w[7]);
	+ t2 = Sum0 (c) + Maj (c, d, e);
	+ f += t1;
	+ b = t1 + t2;
	+
	+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
	+ w[7] += S1 (w[5]) + w[0] + S0 (w[8]);
	+ t2 = Sum0 (b) + Maj (b, c, d);
	+ e += t1;
	+ a = t1 + t2;
	+
	+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
	+ w[8] += S1 (w[6]) + w[1] + S0 (w[9]);
	+ t2 = Sum0 (a) + Maj (a, b, c);
	+ d += t1;
	+ h = t1 + t2;
	+
	+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
	+ w[9] += S1 (w[7]) + w[2] + S0 (w[10]);
	+ t2 = Sum0 (h) + Maj (h, a, b);
	+ c += t1;
	+ g = t1 + t2;
	+
	+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
	+ w[10] += S1 (w[8]) + w[3] + S0 (w[11]);
	+ t2 = Sum0 (g) + Maj (g, h, a);
	+ b += t1;
	+ f = t1 + t2;
	+
	+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
	+ w[11] += S1 (w[9]) + w[4] + S0 (w[12]);
	+ t2 = Sum0 (f) + Maj (f, g, h);
	+ a += t1;
	+ e = t1 + t2;
	+
	+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
	+ w[12] += S1 (w[10]) + w[5] + S0 (w[13]);
	+ t2 = Sum0 (e) + Maj (e, f, g);
	+ h += t1;
	+ d = t1 + t2;
	+
	+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
	+ w[13] += S1 (w[11]) + w[6] + S0 (w[14]);
	+ t2 = Sum0 (d) + Maj (d, e, f);
	+ g += t1;
	+ c = t1 + t2;
	+
	+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
	+ w[14] += S1 (w[12]) + w[7] + S0 (w[15]);
	+ t2 = Sum0 (c) + Maj (c, d, e);
	+ f += t1;
	+ b = t1 + t2;
	+
	+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
	+ w[15] += S1 (w[13]) + w[8] + S0 (w[0]);
	+ t2 = Sum0 (b) + Maj (b, c, d);
	+ e += t1;
	+ a = t1 + t2;
	+
	+ t += 16;
	+ }
	+
	+ for (; t < 80; )
	+ {
	+ u64 t1, t2;
	+
	+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
	+ t2 = Sum0 (a) + Maj (a, b, c);
	+ d += t1;
	+ h = t1 + t2;
	+
	+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
	+ t2 = Sum0 (h) + Maj (h, a, b);
	+ c += t1;
	+ g = t1 + t2;
	+
	+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
	+ t2 = Sum0 (g) + Maj (g, h, a);
	+ b += t1;
	+ f = t1 + t2;
	+
	+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
	+ t2 = Sum0 (f) + Maj (f, g, h);
	+ a += t1;
	+ e = t1 + t2;
	+
	+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
	+ t2 = Sum0 (e) + Maj (e, f, g);
	+ h += t1;
	+ d = t1 + t2;
	+
	+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
	+ t2 = Sum0 (d) + Maj (d, e, f);
	+ g += t1;
	+ c = t1 + t2;
	+
	+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
	+ t2 = Sum0 (c) + Maj (c, d, e);
	+ f += t1;
	+ b = t1 + t2;
	+
	+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
	+ t2 = Sum0 (b) + Maj (b, c, d);
	+ e += t1;
	+ a = t1 + t2;
	+
	+ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
	+ t2 = Sum0 (a) + Maj (a, b, c);
	+ d += t1;
	+ h = t1 + t2;
	+
	+ t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
	+ t2 = Sum0 (h) + Maj (h, a, b);
	+ c += t1;
	+ g = t1 + t2;
	+
	+ t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
	+ t2 = Sum0 (g) + Maj (g, h, a);
	+ b += t1;
	+ f = t1 + t2;
	+
	+ t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
	+ t2 = Sum0 (f) + Maj (f, g, h);
	+ a += t1;
	+ e = t1 + t2;
	+
	+ t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
	+ t2 = Sum0 (e) + Maj (e, f, g);
	+ h += t1;
	+ d = t1 + t2;
	+
	+ t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
	+ t2 = Sum0 (d) + Maj (d, e, f);
	+ g += t1;
	+ c = t1 + t2;
	+
	+ t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
	+ t2 = Sum0 (c) + Maj (c, d, e);
	+ f += t1;
	+ b = t1 + t2;
	+
	+ t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
	+ t2 = Sum0 (b) + Maj (b, c, d);
	+ e += t1;
	+ a = t1 + t2;
	+
	+ t += 16;
	+ }
	+
	+ /* Update chaining vars. */
	+ hd->h0 += a;
	+ hd->h1 += b;
	+ hd->h2 += c;
	+ hd->h3 += d;
	+ hd->h4 += e;
	+ hd->h5 += f;
	+ hd->h6 += g;
	+ hd->h7 += h;

	-#ifdef USE_ARM_ASM
	- burn = _gcry_sha512_transform_arm (&ctx->state, data, k, nblks);
	-#else
	- do
	- {
	- burn = transform_blk (&ctx->state, data) + 3 * sizeof(void*);
	data += 128;
	}
	while (--nblks);

	-#ifdef ASM_EXTRA_STACK
	- /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
	- * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
	- * here too.
	- */
	- burn += ASM_EXTRA_STACK;
	-#endif
	-#endif
	-
	- return burn;
	+ return (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*);
	}
	+#endif /!USE_ARM_ASM/


	/* The routine final terminates the computation and
	* returns the digest.
	* The handle is prepared for a new cycle, but adding bytes to the
	* handle will the destroy the returned buffer.
	* Returns: 64 bytes representing the digest. When used for sha384,
	* we take the leftmost 48 of those bytes.
	*/

	static void
	sha512_final (void *context)
	{
	SHA512_CONTEXT *hd = context;
	unsigned int stack_burn_depth;
	u64 t, th, msb, lsb;
	byte *p;

	_gcry_md_block_write (context, NULL, 0); /* flush */ ;

	t = hd->bctx.nblocks;
	/* if (sizeof t == sizeof hd->bctx.nblocks) */
	th = hd->bctx.nblocks_high;
	/* else */
	/* th = hd->bctx.nblocks >> 64; In case we ever use u128 */

	/* multiply by 128 to make a byte count */
	lsb = t << 7;
	msb = (th << 7) \| (t >> 57);
	/* add the count */
	t = lsb;
	if ((lsb += hd->bctx.count) < t)
	msb++;
	/* multiply by 8 to make a bit count */
	t = lsb;
	lsb <<= 3;
	msb <<= 3;
	msb \|= t >> 61;

	if (hd->bctx.count < 112)
	{ /* enough room */
	hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
	while (hd->bctx.count < 112)
	hd->bctx.buf[hd->bctx.count++] = 0; /* pad */
	}
	else
	{ /* need one extra block */
	hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
	while (hd->bctx.count < 128)
	hd->bctx.buf[hd->bctx.count++] = 0;
	_gcry_md_block_write (context, NULL, 0); /* flush */ ;
	memset (hd->bctx.buf, 0, 112); /* fill next block with zeroes */
	}
	/* append the 128 bit count */
	buf_put_be64(hd->bctx.buf + 112, msb);
	buf_put_be64(hd->bctx.buf + 120, lsb);
	- stack_burn_depth = transform (hd, hd->bctx.buf, 1);
	+ stack_burn_depth = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
	_gcry_burn_stack (stack_burn_depth);

	p = hd->bctx.buf;
	#define X(a) do { buf_put_be64(p, hd->state.h##a); p += 8; } while (0)
	X (0);
	X (1);
	X (2);
	X (3);
	X (4);
	X (5);
	/* Note that these last two chunks are included even for SHA384.
	We just ignore them. */
	X (6);
	X (7);
	#undef X
	}

	static byte *
	sha512_read (void *context)
	{
	SHA512_CONTEXT hd = (SHA512_CONTEXT ) context;
	return hd->bctx.buf;
	}


	/* Shortcut functions which puts the hash value of the supplied buffer
	* into outbuf which must have a size of 64 bytes. */
	void
	_gcry_sha512_hash_buffer (void outbuf, const void buffer, size_t length)
	{
	SHA512_CONTEXT hd;

	sha512_init (&hd, 0);
	_gcry_md_block_write (&hd, buffer, length);
	sha512_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 64);
	}


	/* Variant of the above shortcut function using multiple buffers. */
	void
	_gcry_sha512_hash_buffers (void outbuf, const gcry_buffer_t iov, int iovcnt)
	{
	SHA512_CONTEXT hd;

	sha512_init (&hd, 0);
	for (;iovcnt > 0; iov++, iovcnt--)
	_gcry_md_block_write (&hd,
	(const char*)iov[0].data + iov[0].off, iov[0].len);
	sha512_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 64);
	}



	/* Shortcut functions which puts the hash value of the supplied buffer
	* into outbuf which must have a size of 48 bytes. */
	static void
	_gcry_sha384_hash_buffer (void outbuf, const void buffer, size_t length)
	{
	SHA512_CONTEXT hd;

	sha384_init (&hd, 0);
	_gcry_md_block_write (&hd, buffer, length);
	sha512_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 48);
	}


	/* Variant of the above shortcut function using multiple buffers. */
	static void
	_gcry_sha384_hash_buffers (void outbuf, const gcry_buffer_t iov, int iovcnt)
	{
	SHA512_CONTEXT hd;

	sha384_init (&hd, 0);
	for (;iovcnt > 0; iov++, iovcnt--)
	_gcry_md_block_write (&hd,
	(const char*)iov[0].data + iov[0].off, iov[0].len);
	sha512_final (&hd);
	memcpy (outbuf, hd.bctx.buf, 48);
	}



	/*
	Self-test section.
	*/


	static gpg_err_code_t
	selftests_sha384 (int extended, selftest_report_func_t report)
	{
	const char *what;
	const char *errtxt;

	what = "short string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA384, 0,
	"abc", 3,
	"\xcb\x00\x75\x3f\x45\xa3\x5e\x8b\xb5\xa0\x3d\x69\x9a\xc6\x50\x07"
	"\x27\x2c\x32\xab\x0e\xde\xd1\x63\x1a\x8b\x60\x5a\x43\xff\x5b\xed"
	"\x80\x86\x07\x2b\xa1\xe7\xcc\x23\x58\xba\xec\xa1\x34\xc8\x25\xa7", 48);
	if (errtxt)
	goto failed;

	if (extended)
	{
	what = "long string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA384, 0,
	"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
	"hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
	"\x09\x33\x0C\x33\xF7\x11\x47\xE8\x3D\x19\x2F\xC7\x82\xCD\x1B\x47"
	"\x53\x11\x1B\x17\x3B\x3B\x05\xD2\x2F\xA0\x80\x86\xE3\xB0\xF7\x12"
	"\xFC\xC7\xC7\x1A\x55\x7E\x2D\xB9\x66\xC3\xE9\xFA\x91\x74\x60\x39",
	48);
	if (errtxt)
	goto failed;

	what = "one million \"a\"";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA384, 1,
	NULL, 0,
	"\x9D\x0E\x18\x09\x71\x64\x74\xCB\x08\x6E\x83\x4E\x31\x0A\x4A\x1C"
	"\xED\x14\x9E\x9C\x00\xF2\x48\x52\x79\x72\xCE\xC5\x70\x4C\x2A\x5B"
	"\x07\xB8\xB3\xDC\x38\xEC\xC4\xEB\xAE\x97\xDD\xD8\x7F\x3D\x89\x85",
	48);
	if (errtxt)
	goto failed;
	}

	return 0; /* Succeeded. */

	failed:
	if (report)
	report ("digest", GCRY_MD_SHA384, what, errtxt);
	return GPG_ERR_SELFTEST_FAILED;
	}

	static gpg_err_code_t
	selftests_sha512 (int extended, selftest_report_func_t report)
	{
	const char *what;
	const char *errtxt;

	what = "short string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA512, 0,
	"abc", 3,
	"\xDD\xAF\x35\xA1\x93\x61\x7A\xBA\xCC\x41\x73\x49\xAE\x20\x41\x31"
	"\x12\xE6\xFA\x4E\x89\xA9\x7E\xA2\x0A\x9E\xEE\xE6\x4B\x55\xD3\x9A"
	"\x21\x92\x99\x2A\x27\x4F\xC1\xA8\x36\xBA\x3C\x23\xA3\xFE\xEB\xBD"
	"\x45\x4D\x44\x23\x64\x3C\xE8\x0E\x2A\x9A\xC9\x4F\xA5\x4C\xA4\x9F", 64);
	if (errtxt)
	goto failed;

	if (extended)
	{
	what = "long string";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA512, 0,
	"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
	"hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
	"\x8E\x95\x9B\x75\xDA\xE3\x13\xDA\x8C\xF4\xF7\x28\x14\xFC\x14\x3F"
	"\x8F\x77\x79\xC6\xEB\x9F\x7F\xA1\x72\x99\xAE\xAD\xB6\x88\x90\x18"
	"\x50\x1D\x28\x9E\x49\x00\xF7\xE4\x33\x1B\x99\xDE\xC4\xB5\x43\x3A"
	"\xC7\xD3\x29\xEE\xB6\xDD\x26\x54\x5E\x96\xE5\x5B\x87\x4B\xE9\x09",
	64);
	if (errtxt)
	goto failed;

	what = "one million \"a\"";
	errtxt = _gcry_hash_selftest_check_one
	(GCRY_MD_SHA512, 1,
	NULL, 0,
	"\xE7\x18\x48\x3D\x0C\xE7\x69\x64\x4E\x2E\x42\xC7\xBC\x15\xB4\x63"
	"\x8E\x1F\x98\xB1\x3B\x20\x44\x28\x56\x32\xA8\x03\xAF\xA9\x73\xEB"
	"\xDE\x0F\xF2\x44\x87\x7E\xA6\x0A\x4C\xB0\x43\x2C\xE5\x77\xC3\x1B"
	"\xEB\x00\x9C\x5C\x2C\x49\xAA\x2E\x4E\xAD\xB2\x17\xAD\x8C\xC0\x9B",
	64);
	if (errtxt)
	goto failed;
	}

	return 0; /* Succeeded. */

	failed:
	if (report)
	report ("digest", GCRY_MD_SHA512, what, errtxt);
	return GPG_ERR_SELFTEST_FAILED;
	}


	/* Run a full self-test for ALGO and return 0 on success. */
	static gpg_err_code_t
	run_selftests (int algo, int extended, selftest_report_func_t report)
	{
	gpg_err_code_t ec;

	switch (algo)
	{
	case GCRY_MD_SHA384:
	ec = selftests_sha384 (extended, report);
	break;
	case GCRY_MD_SHA512:
	ec = selftests_sha512 (extended, report);
	break;
	default:
	ec = GPG_ERR_DIGEST_ALGO;
	break;

	}
	return ec;
	}




	static byte sha512_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.3 */
	{
	0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
	0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x05,
	0x00, 0x04, 0x40
	};

	static gcry_md_oid_spec_t oid_spec_sha512[] =
	{
	{ "2.16.840.1.101.3.4.2.3" },

	/* PKCS#1 sha512WithRSAEncryption */
	{ "1.2.840.113549.1.1.13" },

	{ NULL }
	};

	gcry_md_spec_t _gcry_digest_spec_sha512 =
	{
	GCRY_MD_SHA512, {0, 1},
	"SHA512", sha512_asn, DIM (sha512_asn), oid_spec_sha512, 64,
	sha512_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
	_gcry_sha512_hash_buffer, _gcry_sha512_hash_buffers,
	sizeof (SHA512_CONTEXT),
	run_selftests
	};

	static byte sha384_asn[] = /* Object ID is 2.16.840.1.101.3.4.2.2 */
	{
	0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
	0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, 0x05,
	0x00, 0x04, 0x30
	};

	static gcry_md_oid_spec_t oid_spec_sha384[] =
	{
	{ "2.16.840.1.101.3.4.2.2" },

	/* PKCS#1 sha384WithRSAEncryption */
	{ "1.2.840.113549.1.1.12" },

	/* SHA384WithECDSA: RFC 7427 (A.3.3.) */
	{ "1.2.840.10045.4.3.3" },

	{ NULL },
	};

	gcry_md_spec_t _gcry_digest_spec_sha384 =
	{
	GCRY_MD_SHA384, {0, 1},
	"SHA384", sha384_asn, DIM (sha384_asn), oid_spec_sha384, 48,
	sha384_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
	_gcry_sha384_hash_buffer, _gcry_sha384_hash_buffers,
	sizeof (SHA512_CONTEXT),
	run_selftests
	};

File Metadata

Mime Type: text/x-diff
Expires: Tue, Dec 9, 1:02 AM (1 d, 4 h)
Storage Engine: local-disk
Storage Format: Raw Data
Storage Handle: 78/37/e4e98d06145cc721670a0d3088d8

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions