Changeset View
Changeset View
Standalone View
Standalone View
cipher/sha256.c
Show All 39 Lines | |||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include "g10lib.h" | #include "g10lib.h" | ||||
#include "bithelp.h" | #include "bithelp.h" | ||||
#include "bufhelp.h" | #include "bufhelp.h" | ||||
#include "cipher.h" | #include "cipher.h" | ||||
#include "hash-common.h" | #include "hash-common.h" | ||||
#include "sha2-common.h" | |||||
/* Helper macro to force alignment to 16 bytes. */ | |||||
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ | #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED | ||||
#undef USE_SSSE3 | # define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) | ||||
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ | #else | ||||
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ | # define ATTR_ALIGNED_16 | ||||
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ | |||||
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) | |||||
# define USE_SSSE3 1 | |||||
#endif | |||||
/* USE_AVX indicates whether to compile with Intel AVX code. */ | |||||
#undef USE_AVX | |||||
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ | |||||
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ | |||||
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ | |||||
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) | |||||
# define USE_AVX 1 | |||||
#endif | |||||
/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */ | |||||
#undef USE_AVX2 | |||||
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ | |||||
defined(HAVE_GCC_INLINE_ASM_BMI2) && \ | |||||
defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ | |||||
(defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ | |||||
defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) | |||||
# define USE_AVX2 1 | |||||
#endif | |||||
/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */ | |||||
#undef USE_SHAEXT | |||||
#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \ | |||||
defined(HAVE_GCC_INLINE_ASM_SSE41) && \ | |||||
defined(ENABLE_SHAEXT_SUPPORT) | |||||
# define USE_SHAEXT 1 | |||||
#endif | |||||
/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly | |||||
* code. */ | |||||
#undef USE_ARM_CE | |||||
#ifdef ENABLE_ARM_CRYPTO_SUPPORT | |||||
# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ | |||||
&& defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ | |||||
&& defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) | |||||
# define USE_ARM_CE 1 | |||||
# elif defined(__AARCH64EL__) \ | |||||
&& defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ | |||||
&& defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) | |||||
# define USE_ARM_CE 1 | |||||
# endif | |||||
#endif | #endif | ||||
typedef struct { | typedef struct { | ||||
gcry_md_block_ctx_t bctx; | gcry_md_block_ctx_t bctx; | ||||
u32 h0,h1,h2,h3,h4,h5,h6,h7; | u32 h0 ATTR_ALIGNED_16; | ||||
u32 h1,h2,h3,h4,h5,h6,h7; | |||||
} SHA256_CONTEXT; | } SHA256_CONTEXT; | ||||
/* Assembly implementations use SystemV ABI, ABI conversion and additional | |||||
* stack to store XMM6-XMM15 needed on Win64. */ | |||||
#undef ASM_FUNC_ABI | |||||
#undef ASM_EXTRA_STACK | |||||
#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \ | |||||
defined(USE_SHAEXT) | |||||
# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS | |||||
# define ASM_FUNC_ABI __attribute__((sysv_abi)) | |||||
# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4) | |||||
# else | |||||
# define ASM_FUNC_ABI | |||||
# define ASM_EXTRA_STACK 0 | |||||
# endif | |||||
#endif | |||||
#ifdef USE_SSSE3 | #ifdef USE_SSSE3 | ||||
unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data, | unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data, | ||||
u32 state[8], | u32 state[8], | ||||
size_t num_blks) ASM_FUNC_ABI; | size_t num_blks) ASM_FUNC_ABI; | ||||
static unsigned int | static unsigned int | ||||
do_sha256_transform_amd64_ssse3(void *ctx, const unsigned char *data, | do_sha256_transform_amd64_ssse3(void *ctx, const unsigned char *data, | ||||
size_t nblks) | size_t nblks) | ||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | |||||
do_sha256_transform_armv8_ce(void *ctx, const unsigned char *data, | do_sha256_transform_armv8_ce(void *ctx, const unsigned char *data, | ||||
size_t nblks) | size_t nblks) | ||||
{ | { | ||||
SHA256_CONTEXT *hd = ctx; | SHA256_CONTEXT *hd = ctx; | ||||
return _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks); | return _gcry_sha256_transform_armv8_ce (&hd->h0, data, nblks); | ||||
} | } | ||||
#endif | #endif | ||||
#ifdef USE_PPC_ASM | |||||
void sha256_block_p8 (u32 state[8], | |||||
const unsigned char *data, | |||||
size_t len); | |||||
static unsigned int | |||||
do_sha256_transform_ppc8 (void *ctx, const unsigned char *data, | |||||
size_t nblks) | |||||
{ | |||||
SHA256_CONTEXT *hd = ctx; | |||||
sha256_block_p8 (&hd->h0, data, nblks); | |||||
return 128; /* uses 128 bytes of stack space */ | |||||
} | |||||
#endif | |||||
static unsigned int | static unsigned int | ||||
do_transform_generic (void *ctx, const unsigned char *data, size_t nblks); | do_transform_generic (void *ctx, const unsigned char *data, size_t nblks); | ||||
static void | static void | ||||
sha256_init (void *context, unsigned int flags) | sha256_init_common (void *context, unsigned int flags) | ||||
{ | { | ||||
SHA256_CONTEXT *hd = context; | SHA256_CONTEXT *hd = context; | ||||
unsigned int features = _gcry_get_hw_features (); | unsigned int features = _gcry_get_hw_features (); | ||||
(void)flags; | (void)flags; | ||||
hd->h0 = 0x6a09e667; | |||||
hd->h1 = 0xbb67ae85; | |||||
hd->h2 = 0x3c6ef372; | |||||
hd->h3 = 0xa54ff53a; | |||||
hd->h4 = 0x510e527f; | |||||
hd->h5 = 0x9b05688c; | |||||
hd->h6 = 0x1f83d9ab; | |||||
hd->h7 = 0x5be0cd19; | |||||
hd->bctx.nblocks = 0; | |||||
hd->bctx.nblocks_high = 0; | |||||
hd->bctx.count = 0; | |||||
hd->bctx.blocksize = 64; | |||||
/* Order of feature checks is important here; last match will be | /* Order of feature checks is important here; last match will be | ||||
* selected. Keep slower implementations at the top and faster at | * selected. Keep slower implementations at the top and faster at | ||||
* the bottom. */ | * the bottom. */ | ||||
hd->bctx.bwrite = do_transform_generic; | hd->bctx.bwrite = do_transform_generic; | ||||
#ifdef USE_SSSE3 | #ifdef USE_SSSE3 | ||||
if ((features & HWF_INTEL_SSSE3) != 0) | if ((features & HWF_INTEL_SSSE3) != 0) | ||||
hd->bctx.bwrite = do_sha256_transform_amd64_ssse3; | hd->bctx.bwrite = do_sha256_transform_amd64_ssse3; | ||||
#endif | #endif | ||||
Show All 10 Lines | |||||
#ifdef USE_SHAEXT | #ifdef USE_SHAEXT | ||||
if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1)) | if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1)) | ||||
hd->bctx.bwrite = do_sha256_transform_intel_shaext; | hd->bctx.bwrite = do_sha256_transform_intel_shaext; | ||||
#endif | #endif | ||||
#ifdef USE_ARM_CE | #ifdef USE_ARM_CE | ||||
if ((features & HWF_ARM_SHA2) != 0) | if ((features & HWF_ARM_SHA2) != 0) | ||||
hd->bctx.bwrite = do_sha256_transform_armv8_ce; | hd->bctx.bwrite = do_sha256_transform_armv8_ce; | ||||
#endif | #endif | ||||
#ifdef USE_PPC_ASM | |||||
if ((features & HWF_PPC_VCRYPTO) != 0) | |||||
hd->bctx.bwrite = do_sha256_transform_ppc8; | |||||
#endif | |||||
(void)features; | (void)features; | ||||
} | } | ||||
static void | |||||
sha256_init (void *context, unsigned int flags) | |||||
{ | |||||
SHA256_CONTEXT *hd = context; | |||||
(void)flags; | |||||
hd->h0 = 0x6a09e667; | |||||
hd->h1 = 0xbb67ae85; | |||||
hd->h2 = 0x3c6ef372; | |||||
hd->h3 = 0xa54ff53a; | |||||
hd->h4 = 0x510e527f; | |||||
hd->h5 = 0x9b05688c; | |||||
hd->h6 = 0x1f83d9ab; | |||||
hd->h7 = 0x5be0cd19; | |||||
hd->bctx.nblocks = 0; | |||||
hd->bctx.nblocks_high = 0; | |||||
hd->bctx.count = 0; | |||||
hd->bctx.blocksize = 64; | |||||
sha256_init_common (context, flags); | |||||
} | |||||
static void | static void | ||||
sha224_init (void *context, unsigned int flags) | sha224_init (void *context, unsigned int flags) | ||||
{ | { | ||||
SHA256_CONTEXT *hd = context; | SHA256_CONTEXT *hd = context; | ||||
unsigned int features = _gcry_get_hw_features (); | |||||
(void)flags; | (void)flags; | ||||
hd->h0 = 0xc1059ed8; | hd->h0 = 0xc1059ed8; | ||||
hd->h1 = 0x367cd507; | hd->h1 = 0x367cd507; | ||||
hd->h2 = 0x3070dd17; | hd->h2 = 0x3070dd17; | ||||
hd->h3 = 0xf70e5939; | hd->h3 = 0xf70e5939; | ||||
hd->h4 = 0xffc00b31; | hd->h4 = 0xffc00b31; | ||||
hd->h5 = 0x68581511; | hd->h5 = 0x68581511; | ||||
hd->h6 = 0x64f98fa7; | hd->h6 = 0x64f98fa7; | ||||
hd->h7 = 0xbefa4fa4; | hd->h7 = 0xbefa4fa4; | ||||
hd->bctx.nblocks = 0; | hd->bctx.nblocks = 0; | ||||
hd->bctx.nblocks_high = 0; | hd->bctx.nblocks_high = 0; | ||||
hd->bctx.count = 0; | hd->bctx.count = 0; | ||||
hd->bctx.blocksize = 64; | hd->bctx.blocksize = 64; | ||||
/* Order of feature checks is important here; last match will be | sha256_init_common (context, flags); | ||||
* selected. Keep slower implementations at the top and faster at | |||||
* the bottom. */ | |||||
hd->bctx.bwrite = do_transform_generic; | |||||
#ifdef USE_SSSE3 | |||||
if ((features & HWF_INTEL_SSSE3) != 0) | |||||
hd->bctx.bwrite = do_sha256_transform_amd64_ssse3; | |||||
#endif | |||||
#ifdef USE_AVX | |||||
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. | |||||
* Therefore use this implementation on Intel CPUs only. */ | |||||
if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD)) | |||||
hd->bctx.bwrite = do_sha256_transform_amd64_avx; | |||||
#endif | |||||
#ifdef USE_AVX2 | |||||
if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2)) | |||||
hd->bctx.bwrite = do_sha256_transform_amd64_avx2; | |||||
#endif | |||||
#ifdef USE_SHAEXT | |||||
if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1)) | |||||
hd->bctx.bwrite = do_sha256_transform_intel_shaext; | |||||
#endif | |||||
#ifdef USE_ARM_CE | |||||
if ((features & HWF_ARM_SHA2) != 0) | |||||
hd->bctx.bwrite = do_sha256_transform_armv8_ce; | |||||
#endif | |||||
(void)features; | |||||
} | } | ||||
/* | /* | ||||
Transform the message X which consists of 16 32-bit-words. See FIPS | Transform the message X which consists of 16 32-bit-words. See FIPS | ||||
180-2 for details. */ | 180-2 for details. */ | ||||
#define R(a,b,c,d,e,f,g,h,k,w) do \ | #define R(a,b,c,d,e,f,g,h,k,w) do \ | ||||
{ \ | { \ | ||||
▲ Show 20 Lines • Show All 462 Lines • Show Last 20 Lines |