Index: cipher/Makefile.am =================================================================== --- cipher/Makefile.am +++ cipher/Makefile.am @@ -117,6 +117,9 @@ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \ sha512-avx2-bmi2-amd64.S \ sha512-armv7-neon.S sha512-arm.S \ + sha512-ppc8.pl \ + sha512-ppc8.S sha512-ppc8be.S sha512-ppc832.S \ + sha256-ppc8.S sha256-ppc8be.S sha256-ppc832.S \ sm3.c \ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ Index: cipher/sha2-common.h =================================================================== --- /dev/null +++ cipher/sha2-common.h @@ -0,0 +1,94 @@ +/* SHA2 for GnuPG + * Copyright (C) 2000, 2001, 2002, 2003, 2007, + * 2008, 2011, 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef G10_SHA2_COMMON_H +#define G10_SHA2_COMMON_H + +/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */ +#undef USE_ARM_NEON_ASM +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_ARM_NEON_ASM 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +/* USE_ARM_ASM indicates whether to enable ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) +# define USE_ARM_ASM 1 +#endif + + +/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ +#undef USE_SSSE3 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_SSSE3 1 +#endif + + +/* USE_AVX indicates whether to compile with Intel AVX code. */ +#undef USE_AVX +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX 1 +#endif + + +/* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */ +#undef USE_AVX2 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX2 1 +#endif + +/* USE_PPC_ASM indicates whether to compile with PowerISA 2.07 crypto support */ +#undef USE_PPC_ASM +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(__powerpc64__) || defined(__powerpc__) +# define USE_PPC_ASM 1 +# endif +#endif + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \ + defined(USE_SHAEXT) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif +#endif \ No newline at end of file Index: cipher/sha256.c =================================================================== --- cipher/sha256.c +++ cipher/sha256.c @@ -45,82 +45,21 @@ #include "bufhelp.h" #include "cipher.h" #include "hash-common.h" +#include "sha2-common.h" - -/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ -#undef USE_SSSE3 -#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ - (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -# define USE_SSSE3 1 -#endif - -/* USE_AVX indicates whether to compile with Intel AVX code. */ -#undef USE_AVX -#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ - (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -# define USE_AVX 1 +/* Helper macro to force alignment to 16 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 #endif -/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */ -#undef USE_AVX2 -#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ - defined(HAVE_GCC_INLINE_ASM_BMI2) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ - (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -# define USE_AVX2 1 -#endif - -/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */ -#undef USE_SHAEXT -#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \ - defined(HAVE_GCC_INLINE_ASM_SSE41) && \ - defined(ENABLE_SHAEXT_SUPPORT) -# define USE_SHAEXT 1 -#endif - -/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly - * code. */ -#undef USE_ARM_CE -#ifdef ENABLE_ARM_CRYPTO_SUPPORT -# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ - && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ - && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) -# define USE_ARM_CE 1 -# elif defined(__AARCH64EL__) \ - && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ - && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) -# define USE_ARM_CE 1 -# endif -#endif - - typedef struct { gcry_md_block_ctx_t bctx; - u32 h0,h1,h2,h3,h4,h5,h6,h7; + u32 h0 ATTR_ALIGNED_16; + u32 h1,h2,h3,h4,h5,h6,h7; } SHA256_CONTEXT; - -/* Assembly implementations use SystemV ABI, ABI conversion and additional - * stack to store XMM6-XMM15 needed on Win64. */ -#undef ASM_FUNC_ABI -#undef ASM_EXTRA_STACK -#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \ - defined(USE_SHAEXT) -# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS -# define ASM_FUNC_ABI __attribute__((sysv_abi)) -# define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4) -# else -# define ASM_FUNC_ABI -# define ASM_EXTRA_STACK 0 -# endif -#endif - - #ifdef USE_SSSE3 unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data, u32 state[8], @@ -196,33 +135,31 @@ } #endif +#ifdef USE_PPC_ASM +void sha256_block_p8 (u32 state[8], + const unsigned char *data, + size_t len); +static unsigned int +do_sha256_transform_ppc8 (void *ctx, const unsigned char *data, + size_t nblks) +{ + SHA256_CONTEXT *hd = ctx; + sha256_block_p8 (&hd->h0, data, nblks); + return 128; /* uses 128 bytes of stack space */ +} +#endif static unsigned int do_transform_generic (void *ctx, const unsigned char *data, size_t nblks); - static void -sha256_init (void *context, unsigned int flags) +sha256_init_common (void *context, unsigned int flags) { SHA256_CONTEXT *hd = context; unsigned int features = _gcry_get_hw_features (); (void)flags; - hd->h0 = 0x6a09e667; - hd->h1 = 0xbb67ae85; - hd->h2 = 0x3c6ef372; - hd->h3 = 0xa54ff53a; - hd->h4 = 0x510e527f; - hd->h5 = 0x9b05688c; - hd->h6 = 0x1f83d9ab; - hd->h7 = 0x5be0cd19; - - hd->bctx.nblocks = 0; - hd->bctx.nblocks_high = 0; - hd->bctx.count = 0; - hd->bctx.blocksize = 64; - /* Order of feature checks is important here; last match will be * selected. Keep slower implementations at the top and faster at * the bottom. */ @@ -248,16 +185,43 @@ #ifdef USE_ARM_CE if ((features & HWF_ARM_SHA2) != 0) hd->bctx.bwrite = do_sha256_transform_armv8_ce; +#endif +#ifdef USE_PPC_ASM + if ((features & HWF_PPC_VCRYPTO) != 0) + hd->bctx.bwrite = do_sha256_transform_ppc8; #endif (void)features; } +static void +sha256_init (void *context, unsigned int flags) +{ + SHA256_CONTEXT *hd = context; + + (void)flags; + + hd->h0 = 0x6a09e667; + hd->h1 = 0xbb67ae85; + hd->h2 = 0x3c6ef372; + hd->h3 = 0xa54ff53a; + hd->h4 = 0x510e527f; + hd->h5 = 0x9b05688c; + hd->h6 = 0x1f83d9ab; + hd->h7 = 0x5be0cd19; + + hd->bctx.nblocks = 0; + hd->bctx.nblocks_high = 0; + hd->bctx.count = 0; + hd->bctx.blocksize = 64; + + sha256_init_common (context, flags); +} + static void sha224_init (void *context, unsigned int flags) { SHA256_CONTEXT *hd = context; - unsigned int features = _gcry_get_hw_features (); (void)flags; @@ -275,33 +239,7 @@ hd->bctx.count = 0; hd->bctx.blocksize = 64; - /* Order of feature checks is important here; last match will be - * selected. Keep slower implementations at the top and faster at - * the bottom. */ - hd->bctx.bwrite = do_transform_generic; -#ifdef USE_SSSE3 - if ((features & HWF_INTEL_SSSE3) != 0) - hd->bctx.bwrite = do_sha256_transform_amd64_ssse3; -#endif -#ifdef USE_AVX - /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. - * Therefore use this implementation on Intel CPUs only. */ - if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD)) - hd->bctx.bwrite = do_sha256_transform_amd64_avx; -#endif -#ifdef USE_AVX2 - if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2)) - hd->bctx.bwrite = do_sha256_transform_amd64_avx2; -#endif -#ifdef USE_SHAEXT - if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1)) - hd->bctx.bwrite = do_sha256_transform_intel_shaext; -#endif -#ifdef USE_ARM_CE - if ((features & HWF_ARM_SHA2) != 0) - hd->bctx.bwrite = do_sha256_transform_armv8_ce; -#endif - (void)features; + sha256_init_common (context, flags); } Index: cipher/sha512-ppc8.pl =================================================================== --- cipher/sha512-ppc8.pl +++ cipher/sha512-ppc8.pl @@ -1,5 +1,7 @@ #! /usr/bin/env perl # SPDX-License-Identifier: BSD-3-Clause +# +#Changes: rename ppc-xlate.pl # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -54,6 +56,7 @@ $LENDIAN=($flavour=~/le/); $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}asm-common-ppc.pl" and -f $xlate ) or ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or die "can't locate ppc-xlate.pl"; Index: cipher/sha512.c =================================================================== --- cipher/sha512.c +++ cipher/sha512.c @@ -53,55 +53,7 @@ #include "bufhelp.h" #include "cipher.h" #include "hash-common.h" - - -/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */ -#undef USE_ARM_NEON_ASM -#ifdef ENABLE_NEON_SUPPORT -# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ - && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ - && defined(HAVE_GCC_INLINE_ASM_NEON) -# define USE_ARM_NEON_ASM 1 -# endif -#endif /*ENABLE_NEON_SUPPORT*/ - - -/* USE_ARM_ASM indicates whether to enable ARM assembly code. */ -#undef USE_ARM_ASM -#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) -# define USE_ARM_ASM 1 -#endif - - -/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ -#undef USE_SSSE3 -#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ - (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -# define USE_SSSE3 1 -#endif - - -/* USE_AVX indicates whether to compile with Intel AVX code. */ -#undef USE_AVX -#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ - (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -# define USE_AVX 1 -#endif - - -/* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */ -#undef USE_AVX2 -#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ - defined(HAVE_GCC_INLINE_ASM_BMI2) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ - (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -# define USE_AVX2 1 -#endif +#include "sha2-common.h" typedef struct @@ -160,22 +112,6 @@ U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817) }; - -/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional - * stack to store XMM6-XMM15 needed on Win64. */ -#undef ASM_FUNC_ABI -#undef ASM_EXTRA_STACK -#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) -# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS -# define ASM_FUNC_ABI __attribute__((sysv_abi)) -# define ASM_EXTRA_STACK (10 * 16 + 4 * sizeof(void *)) -# else -# define ASM_FUNC_ABI -# define ASM_EXTRA_STACK 0 -# endif -#endif - - #ifdef USE_ARM_NEON_ASM unsigned int _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd, const unsigned char *data, @@ -190,6 +126,20 @@ } #endif +#ifdef USE_PPC_ASM +void sha512_block_p8 (SHA512_STATE *hd, + const unsigned char *data, + size_t len); +static unsigned int +do_sha512_transform_ppc8 (void *ctx, const unsigned char *data, + size_t nblks) +{ + SHA512_CONTEXT *hd = ctx; + sha512_block_p8 (&hd->state, data, nblks); + return 128; /* uses 128 bytes of stack space */ +} +#endif + #ifdef USE_SSSE3 unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data, void *state, @@ -274,6 +224,10 @@ if ((features & HWF_ARM_NEON) != 0) ctx->bctx.bwrite = do_sha512_transform_armv7_neon; #endif +#ifdef USE_PPC_ASM + if ((features & HWF_PPC_VCRYPTO) != 0) + ctx->bctx.bwrite = do_sha512_transform_ppc8; +#endif #ifdef USE_SSSE3 if ((features & HWF_INTEL_SSSE3) != 0) ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3; Index: configure.ac =================================================================== --- configure.ac +++ configure.ac @@ -2241,16 +2241,19 @@ powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc8.lo sha256-ppc8.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8be.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc8be.lo sha256-ppc8be.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc832.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc832.lo sha256-ppc832.lo" ;; esac