diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 8c7ec095..ada89418 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -1,331 +1,332 @@ # Makefile for cipher modules # Copyright (C) 1998, 1999, 2000, 2001, 2002, # 2003, 2009 Free Software Foundation, Inc. # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # Process this file with automake to produce Makefile.in # Need to include ../src in addition to top_srcdir because gcrypt.h is # a built header. AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi AM_CFLAGS = $(GPG_ERROR_CFLAGS) AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) EXTRA_DIST = gost-s-box.c CLEANFILES = gost-s-box$(EXEEXT_FOR_BUILD) DISTCLEANFILES = gost-sb.h noinst_LTLIBRARIES = libcipher.la GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) libcipher_la_LIBADD = $(GCRYPT_MODULES) libcipher_la_SOURCES = \ cipher.c cipher-internal.h \ cipher-cbc.c \ cipher-cfb.c \ cipher-ofb.c \ cipher-ctr.c \ cipher-aeswrap.c \ cipher-ccm.c \ cipher-cmac.c \ cipher-gcm.c \ cipher-poly1305.c \ cipher-ocb.c \ cipher-xts.c \ cipher-eax.c \ cipher-siv.c \ cipher-gcm-siv.c \ pubkey.c pubkey-internal.h pubkey-util.c \ md.c \ mac.c mac-internal.h \ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ poly1305.c poly1305-internal.h \ kdf.c kdf-internal.h \ bithelp.h \ bufhelp.h \ bulkhelp.h \ primegen.c \ hash-common.c hash-common.h \ dsa-common.c rsa-common.c \ sha1.h EXTRA_libcipher_la_SOURCES = \ asm-common-aarch64.h \ asm-common-amd64.h \ asm-common-s390x.h \ asm-inline-s390x.h \ asm-poly1305-aarch64.h \ asm-poly1305-amd64.h \ asm-poly1305-s390x.h \ aria.c aria-aesni-avx-amd64.S aria-aesni-avx2-amd64.S \ aria-gfni-avx512-amd64.S \ arcfour.c arcfour-amd64.S \ blowfish.c blowfish-amd64.S blowfish-arm.S \ cast5.c cast5-amd64.S cast5-arm.S \ chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \ chacha20-amd64-avx512.S chacha20-armv7-neon.S chacha20-aarch64.S \ chacha20-ppc.c chacha20-s390x.S \ chacha20-p10le-8x.s \ cipher-gcm-ppc.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ crc.c crc-intel-pclmul.c crc-armv8-ce.c \ crc-armv8-aarch64-ce.S \ crc-ppc.c \ des.c des-amd64.S \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ md4.c \ md5.c \ poly1305-s390x.S poly1305-amd64-avx512.S \ poly1305-p10le.s \ rijndael.c rijndael-internal.h rijndael-tables.h \ rijndael-aesni.c rijndael-padlock.c \ rijndael-amd64.S rijndael-arm.S \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-vaes.c rijndael-vaes-avx2-amd64.S \ + rijndael-vaes-i386.c rijndael-vaes-avx2-i386.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ rijndael-ppc.c rijndael-ppc9le.c \ - rijndael-p10le.c rijndael-gcm-p10le.s \ + rijndael-p10le.c rijndael-gcm-p10le.s \ rijndael-ppc-common.h rijndael-ppc-functions.h \ rijndael-s390x.c \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ scrypt.c \ seed.c \ serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S \ serpent-avx512-x86.c serpent-armv7-neon.S \ sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S \ sm4-gfni-avx2-amd64.S sm4-gfni-avx512-amd64.S \ sm4-aarch64.S sm4-armv8-aarch64-ce.S sm4-armv9-aarch64-sve-ce.S \ sm4-ppc.c \ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \ sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \ sha256-avx2-bmi2-amd64.S \ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ sha256-intel-shaext.c sha256-ppc.c \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \ sha512-avx2-bmi2-amd64.S sha512-avx512-amd64.S \ sha512-armv7-neon.S sha512-armv8-aarch64-ce.S sha512-arm.S \ sha512-ppc.c sha512-ssse3-i386.c \ sm3.c sm3-avx-bmi2-amd64.S sm3-aarch64.S sm3-armv8-aarch64-ce.S \ keccak.c keccak_permute_32.h keccak_permute_64.h \ keccak-armv7-neon.S keccak-amd64-avx512.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ twofish-avx2-amd64.S \ rfc2268.c \ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ camellia-aesni-avx2-amd64.h \ camellia-gfni-avx2-amd64.S camellia-gfni-avx512-amd64.S \ camellia-vaes-avx2-amd64.S camellia-aesni-avx2-amd64.S \ camellia-arm.S camellia-aarch64.S camellia-aarch64-ce.c \ camellia-simd128.h camellia-ppc8le.c camellia-ppc9le.c \ blake2.c \ blake2b-amd64-avx2.S blake2b-amd64-avx512.S \ blake2s-amd64-avx.S blake2s-amd64-avx512.S gost28147.lo: gost-sb.h gost-sb.h: gost-s-box$(EXEEXT_FOR_BUILD) ./gost-s-box$(EXEEXT_FOR_BUILD) $@ gost-s-box$(EXEEXT_FOR_BUILD): gost-s-box.c $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) \ $(CPPFLAGS_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c if ENABLE_O_FLAG_MUNGING o_flag_munging = sed -e 's/-O\([2-9sgz][2-9sgz]*\)/-O1/' -e 's/-Ofast/-O1/g' else o_flag_munging = cat endif # We need to lower the optimization for this module. tiger.o: $(srcdir)/tiger.c Makefile `echo $(COMPILE) -c $< | $(o_flag_munging) ` tiger.lo: $(srcdir)/tiger.c Makefile `echo $(LTCOMPILE) -c $< | $(o_flag_munging) ` # We need to disable instrumentation for these modules as they use cc as # thin assembly front-end and do not tolerate in-between function calls # inserted by compiler as those functions may clobber the XMM registers. if ENABLE_INSTRUMENTATION_MUNGING instrumentation_munging = sed \ -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ -e 's/-fcoverage[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' else instrumentation_munging = cat endif rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha256-ssse3-i386.o: $(srcdir)/sha256-ssse3-i386.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha256-ssse3-i386.lo: $(srcdir)/sha256-ssse3-i386.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS ppc_vcrypto_cflags = -O2 -maltivec -mvsx -mcrypto else ppc_vcrypto_cflags = endif if ENABLE_AARCH64_NEON_INTRINSICS_EXTRA_CFLAGS aarch64_neon_cflags = -O2 -march=armv8-a+crypto else aarch64_neon_cflags = endif rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-p10le.o: $(srcdir)/rijndael-p10le.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-p10le.lo: $(srcdir)/rijndael-p10le.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha256-ppc.lo: $(srcdir)/sha256-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` chacha20-ppc.o: $(srcdir)/chacha20-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` chacha20-ppc.lo: $(srcdir)/chacha20-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` crc-ppc.o: $(srcdir)/crc-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` cipher-gcm-ppc.o: $(srcdir)/cipher-gcm-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` cipher-gcm-ppc.lo: $(srcdir)/cipher-gcm-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` camellia-ppc8le.o: $(srcdir)/camellia-ppc8le.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` camellia-ppc8le.lo: $(srcdir)/camellia-ppc8le.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` camellia-ppc9le.o: $(srcdir)/camellia-ppc9le.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` camellia-ppc9le.lo: $(srcdir)/camellia-ppc9le.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` camellia-aarch64-ce.o: $(srcdir)/camellia-aarch64-ce.c Makefile `echo $(COMPILE) $(aarch64_neon_cflags) -c $< | $(instrumentation_munging) ` camellia-aarch64-ce.lo: $(srcdir)/camellia-aarch64-ce.c Makefile `echo $(LTCOMPILE) $(aarch64_neon_cflags) -c $< | $(instrumentation_munging) ` sm4-ppc.o: $(srcdir)/sm4-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sm4-ppc.lo: $(srcdir)/sm4-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` if ENABLE_X86_AVX512_INTRINSICS_EXTRA_CFLAGS avx512f_cflags = -mavx512f else avx512f_cflags = endif serpent-avx512-x86.o: $(srcdir)/serpent-avx512-x86.c Makefile `echo $(COMPILE) $(avx512f_cflags) -c $< | $(instrumentation_munging) ` serpent-avx512-x86.lo: $(srcdir)/serpent-avx512-x86.c Makefile `echo $(LTCOMPILE) $(avx512f_cflags) -c $< | $(instrumentation_munging) ` diff --git a/cipher/asm-common-i386.h b/cipher/asm-common-i386.h new file mode 100644 index 00000000..d746ebc4 --- /dev/null +++ b/cipher/asm-common-i386.h @@ -0,0 +1,161 @@ +/* asm-common-i386.h - Common macros for i386 assembly + * + * Copyright (C) 2023 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_I386_H +#define GCRY_ASM_COMMON_I386_H + +#include + +#ifdef HAVE_COMPATIBLE_GCC_I386_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef HAVE_COMPATIBLE_GCC_WIN32_PLATFORM_AS +# define SECTION_RODATA .section .rdata +#else +# define SECTION_RODATA .section .rodata +#endif + +#ifdef HAVE_COMPATIBLE_GCC_WIN32_PLATFORM_AS +# define SYM_NAME(name) _##name +#else +# define SYM_NAME(name) name +#endif + +#ifdef HAVE_COMPATIBLE_GCC_WIN32_PLATFORM_AS +# define DECL_GET_PC_THUNK(reg) +# define GET_DATA_POINTER(name, reg) leal name, %reg +#else +# define DECL_GET_PC_THUNK(reg) \ + .type __gcry_get_pc_thunk_##reg, @function; \ + .align 16; \ + __gcry_get_pc_thunk_##reg:; \ + CFI_STARTPROC(); \ + movl (%esp), %reg; \ + ret_spec_stop; \ + CFI_ENDPROC() +# define GET_DATA_POINTER(name, reg) \ + call __gcry_get_pc_thunk_##reg; \ + addl $_GLOBAL_OFFSET_TABLE_, %reg; \ + movl name##@GOT(%reg), %reg; +#endif + +#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES +/* CFI directives to emit DWARF stack unwinding information. */ +# define CFI_STARTPROC() .cfi_startproc +# define CFI_ENDPROC() .cfi_endproc +# define CFI_REMEMBER_STATE() .cfi_remember_state +# define CFI_RESTORE_STATE() .cfi_restore_state +# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off +# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off +# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg +# define CFI_REGISTER(ro,rn) .cfi_register ro, rn +# define CFI_RESTORE(reg) .cfi_restore reg + +# define CFI_PUSH(reg) \ + CFI_ADJUST_CFA_OFFSET(4); CFI_REL_OFFSET(reg, 0) +# define CFI_POP(reg) \ + CFI_ADJUST_CFA_OFFSET(-4); CFI_RESTORE(reg) +# define CFI_POP_TMP_REG() \ + CFI_ADJUST_CFA_OFFSET(-4); +# define CFI_LEAVE() \ + CFI_ADJUST_CFA_OFFSET(-4); CFI_DEF_CFA_REGISTER(%esp) + +/* CFA expressions are used for pointing CFA and registers to + * %rsp relative offsets. */ +# define DW_REGNO_eax 0 +# define DW_REGNO_edx 1 +# define DW_REGNO_ecx 2 +# define DW_REGNO_ebx 3 +# define DW_REGNO_esi 4 +# define DW_REGNO_edi 5 +# define DW_REGNO_ebp 6 +# define DW_REGNO_esp 7 + +# define DW_REGNO(reg) DW_REGNO_ ## reg + +/* Fixed length encoding used for integers for now. */ +# define DW_SLEB128_7BIT(value) \ + 0x00|((value) & 0x7f) +# define DW_SLEB128_28BIT(value) \ + 0x80|((value)&0x7f), \ + 0x80|(((value)>>7)&0x7f), \ + 0x80|(((value)>>14)&0x7f), \ + 0x00|(((value)>>21)&0x7f) + +# define CFI_CFA_ON_STACK(esp_offs,cfa_depth) \ + .cfi_escape \ + 0x0f, /* DW_CFA_def_cfa_expression */ \ + DW_SLEB128_7BIT(11), /* length */ \ + 0x77, /* DW_OP_breg7, rsp + constant */ \ + DW_SLEB128_28BIT(esp_offs), \ + 0x06, /* DW_OP_deref */ \ + 0x23, /* DW_OP_plus_constu */ \ + DW_SLEB128_28BIT((cfa_depth)+4) + +# define CFI_REG_ON_STACK(reg,esp_offs) \ + .cfi_escape \ + 0x10, /* DW_CFA_expression */ \ + DW_SLEB128_7BIT(DW_REGNO(reg)), \ + DW_SLEB128_7BIT(5), /* length */ \ + 0x77, /* DW_OP_breg7, rsp + constant */ \ + DW_SLEB128_28BIT(esp_offs) + +#else +# define CFI_STARTPROC() +# define CFI_ENDPROC() +# define CFI_REMEMBER_STATE() +# define CFI_RESTORE_STATE() +# define CFI_ADJUST_CFA_OFFSET(off) +# define CFI_REL_OFFSET(reg,off) +# define CFI_DEF_CFA_REGISTER(reg) +# define CFI_REGISTER(ro,rn) +# define CFI_RESTORE(reg) + +# define CFI_PUSH(reg) +# define CFI_POP(reg) +# define CFI_POP_TMP_REG() +# define CFI_LEAVE() + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) +# define CFI_REG_ON_STACK(reg,rsp_offs) +#endif + +/* 'ret' instruction replacement for straight-line speculation mitigation. */ +#define ret_spec_stop \ + ret; int3; + +/* This prevents speculative execution on old AVX512 CPUs, to prevent + * speculative execution to AVX512 code. The vpopcntb instruction is + * available on newer CPUs that do not suffer from significant frequency + * drop when 512-bit vectors are utilized. */ +#define spec_stop_avx512 \ + vpxord %ymm7, %ymm7, %ymm7; \ + vpopcntb %xmm7, %xmm7; /* Supported only by newer AVX512 CPUs. */ \ + vpxord %ymm7, %ymm7, %ymm7; + +#define spec_stop_avx512_intel_syntax \ + vpxord ymm7, ymm7, ymm7; \ + vpopcntb xmm7, xmm7; /* Supported only by newer AVX512 CPUs. */ \ + vpxord ymm7, ymm7, ymm7; + +#endif /* GCRY_ASM_COMMON_AMD64_H */ diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h index 52c892fd..166f2415 100644 --- a/cipher/rijndael-internal.h +++ b/cipher/rijndael-internal.h @@ -1,206 +1,216 @@ /* Rijndael (AES) for GnuPG * Copyright (C) 2000, 2001, 2002, 2003, 2007, * 2008, 2011, 2012 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifndef G10_RIJNDAEL_INTERNAL_H #define G10_RIJNDAEL_INTERNAL_H #include "types.h" /* for byte and u32 typedefs */ #define MAXKC (256/32) #define MAXROUNDS 14 #define BLOCKSIZE (128/8) /* Helper macro to force alignment to 16 or 64 bytes. */ #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED # define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) # define ATTR_ALIGNED_64 __attribute__ ((aligned (64))) #else # define ATTR_ALIGNED_16 # define ATTR_ALIGNED_64 #endif /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ #undef USE_AMD64_ASM #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AMD64_ASM 1 #endif /* USE_SSSE3 indicates whether to use SSSE3 code. */ #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSSE3 1 #endif /* USE_ARM_ASM indicates whether to use ARM assembly code. */ #undef USE_ARM_ASM #if defined(__ARMEL__) # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS # define USE_ARM_ASM 1 # endif #endif #if defined(__AARCH64EL__) # ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS # define USE_ARM_ASM 1 # endif #endif /* USE_PADLOCK indicates whether to compile the padlock specific code. */ #undef USE_PADLOCK #ifdef ENABLE_PADLOCK_SUPPORT # ifdef HAVE_GCC_ATTRIBUTE_ALIGNED # if (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__) # define USE_PADLOCK 1 # endif # endif #endif /* ENABLE_PADLOCK_SUPPORT */ /* USE_AESNI inidicates whether to compile with Intel AES-NI code. We need the vector-size attribute which seems to be available since gcc 3. However, to be on the safe side we require at least gcc 4. */ #undef USE_AESNI #ifdef ENABLE_AESNI_SUPPORT # if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) # if __GNUC__ >= 4 # define USE_AESNI 1 # endif # endif #endif /* ENABLE_AESNI_SUPPORT */ -/* USE_VAES inidicates whether to compile with Intel VAES code. */ +/* USE_VAES inidicates whether to compile with AMD64 VAES code. */ #undef USE_VAES #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(__x86_64__) && defined(ENABLE_AVX2_SUPPORT) && \ defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL) && \ defined(USE_AESNI) # define USE_VAES 1 #endif +/* USE_VAES_I386 inidicates whether to compile with i386 VAES code. */ +#undef USE_VAES_I386 +#if (defined(HAVE_COMPATIBLE_GCC_I386_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN32_PLATFORM_AS)) && \ + defined(__i386__) && defined(ENABLE_AVX2_SUPPORT) && \ + defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL) && \ + defined(USE_AESNI) +# define USE_VAES_I386 1 +#endif + /* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly * code. */ #undef USE_ARM_CE #ifdef ENABLE_ARM_CRYPTO_SUPPORT # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) # define USE_ARM_CE 1 # elif defined(__AARCH64EL__) \ && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) # define USE_ARM_CE 1 # endif #endif /* ENABLE_ARM_CRYPTO_SUPPORT */ /* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto * accelerated code. USE_PPC_CRYPTO_WITH_PPC9LE indicates whether to * enable POWER9 optimized variant. */ #undef USE_PPC_CRYPTO #undef USE_PPC_CRYPTO_WITH_PPC9LE #ifdef ENABLE_PPC_CRYPTO_SUPPORT # if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) # if __GNUC__ >= 4 # define USE_PPC_CRYPTO 1 # if !defined(WORDS_BIGENDIAN) && defined(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00) # define USE_PPC_CRYPTO_WITH_PPC9LE 1 # endif # endif # endif #endif /* ENABLE_PPC_CRYPTO_SUPPORT */ /* USE_S390X_CRYPTO indicates whether to enable zSeries code. */ #undef USE_S390X_CRYPTO #if defined(HAVE_GCC_INLINE_ASM_S390X) # define USE_S390X_CRYPTO 1 #endif /* USE_S390X_CRYPTO */ struct RIJNDAEL_context_s; typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx, unsigned char *bx, const unsigned char *ax); typedef void (*rijndael_prefetchfn_t)(void); typedef void (*rijndael_prepare_decfn_t)(struct RIJNDAEL_context_s *ctx); /* Our context object. */ typedef struct RIJNDAEL_context_s { /* The first fields are the keyschedule arrays. This is so that they are aligned on a 16 byte boundary if using gcc. This alignment is required for the AES-NI code and a good idea in any case. The alignment is guaranteed due to the way cipher.c allocates the space for the context. The PROPERLY_ALIGNED_TYPE hack is used to force a minimal alignment if not using gcc of if the alignment requirement is higher that 16 bytes. */ union { PROPERLY_ALIGNED_TYPE dummy; byte keyschedule[MAXROUNDS+1][4][4]; u32 keyschedule32[MAXROUNDS+1][4]; u32 keyschedule32b[(MAXROUNDS+1)*4]; #ifdef USE_PADLOCK /* The key as passed to the padlock engine. It is only used if the padlock engine is used (USE_PADLOCK, below). */ unsigned char padlock_key[16] __attribute__ ((aligned (16))); #endif /*USE_PADLOCK*/ } u1; union { PROPERLY_ALIGNED_TYPE dummy; byte keyschedule[MAXROUNDS+1][4][4]; u32 keyschedule32[MAXROUNDS+1][4]; } u2; int rounds; /* Key-length-dependent number of rounds. */ unsigned int decryption_prepared:1; /* The decryption key schedule is available. */ #ifdef USE_AESNI unsigned int use_avx:1; /* AVX shall be used by AES-NI implementation. */ unsigned int use_avx2:1; /* AVX2 shall be used by AES-NI implementation. */ #endif /*USE_AESNI*/ #ifdef USE_S390X_CRYPTO byte km_func; byte km_func_xts; byte kmc_func; byte kmac_func; byte kmf_func; byte kmo_func; byte kma_func; #endif /*USE_S390X_CRYPTO*/ rijndael_cryptfn_t encrypt_fn; rijndael_cryptfn_t decrypt_fn; rijndael_prefetchfn_t prefetch_enc_fn; rijndael_prefetchfn_t prefetch_dec_fn; rijndael_prepare_decfn_t prepare_decryption; } RIJNDAEL_context ATTR_ALIGNED_16; /* Macros defining alias for the keyschedules. */ #define keyschenc u1.keyschedule #define keyschenc32 u1.keyschedule32 #define keyschenc32b u1.keyschedule32b #define keyschdec u2.keyschedule #define keyschdec32 u2.keyschedule32 #define padlockkey u1.padlock_key #endif /* G10_RIJNDAEL_INTERNAL_H */ diff --git a/cipher/rijndael-vaes-avx2-i386.S b/cipher/rijndael-vaes-avx2-i386.S new file mode 100644 index 00000000..245e8443 --- /dev/null +++ b/cipher/rijndael-vaes-avx2-i386.S @@ -0,0 +1,2804 @@ +/* VAES/AVX2 i386 accelerated AES for Libgcrypt + * Copyright (C) 2023 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#if defined(__i386__) +#include +#if (defined(HAVE_COMPATIBLE_GCC_I386_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN32_PLATFORM_AS)) && \ + defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) && \ + defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL) + +#include "asm-common-i386.h" + +.text + +DECL_GET_PC_THUNK(eax); + +/********************************************************************** + helper macros + **********************************************************************/ +#define AES_OP4(op, key, b0, b1, b2, b3) \ + op key, b0, b0; \ + op key, b1, b1; \ + op key, b2, b2; \ + op key, b3, b3; + +#define VAESENC4(key, b0, b1, b2, b3) \ + AES_OP4(vaesenc, key, b0, b1, b2, b3) + +#define VAESDEC4(key, b0, b1, b2, b3) \ + AES_OP4(vaesdec, key, b0, b1, b2, b3) + +#define XOR4(key, b0, b1, b2, b3) \ + AES_OP4(vpxor, key, b0, b1, b2, b3) + +#define AES_OP2(op, key, b0, b1) \ + op key, b0, b0; \ + op key, b1, b1; + +#define VAESENC2(key, b0, b1) \ + AES_OP2(vaesenc, key, b0, b1) + +#define VAESDEC2(key, b0, b1) \ + AES_OP2(vaesdec, key, b0, b1) + +#define XOR2(key, b0, b1) \ + AES_OP2(vpxor, key, b0, b1) + +#define VAESENC6(key, b0, b1, b2, b3, b4, b5) \ + AES_OP4(vaesenc, key, b0, b1, b2, b3); \ + AES_OP2(vaesenc, key, b4, b5) + +#define VAESDEC6(key, b0, b1, b2, b3, b4, b5) \ + AES_OP4(vaesdec, key, b0, b1, b2, b3); \ + AES_OP2(vaesdec, key, b4, b5) + +#define XOR6(key, b0, b1, b2, b3, b4, b5) \ + AES_OP4(vpxor, key, b0, b1, b2, b3); \ + AES_OP2(vpxor, key, b4, b5) + +#define CADDR(name, reg) \ + (name - SYM_NAME(_gcry_vaes_consts))(reg) + +/********************************************************************** + CBC-mode decryption + **********************************************************************/ +ELF(.type SYM_NAME(_gcry_vaes_avx2_cbc_dec_i386),@function) +.globl SYM_NAME(_gcry_vaes_avx2_cbc_dec_i386) +.align 16 +SYM_NAME(_gcry_vaes_avx2_cbc_dec_i386): + /* input: + * (esp + 4): round keys + * (esp + 8): iv + * (esp + 12): dst + * (esp + 16): src + * (esp + 20): nblocks + * (esp + 24): nrounds + */ + CFI_STARTPROC(); + pushl %edi; + CFI_PUSH(%edi); + pushl %esi; + CFI_PUSH(%esi); + + movl 8+4(%esp), %edi; + movl 8+8(%esp), %esi; + movl 8+12(%esp), %edx; + movl 8+16(%esp), %ecx; + movl 8+20(%esp), %eax; + + /* Process 8 blocks per loop. */ +.align 8 +.Lcbc_dec_blk8: + cmpl $8, %eax; + jb .Lcbc_dec_blk4; + + leal -8(%eax), %eax; + + /* Load input and xor first key. Update IV. */ + vbroadcasti128 (0 * 16)(%edi), %ymm4; + vmovdqu (0 * 16)(%ecx), %ymm0; + vmovdqu (2 * 16)(%ecx), %ymm1; + vmovdqu (4 * 16)(%ecx), %ymm2; + vmovdqu (6 * 16)(%ecx), %ymm3; + vmovdqu (%esi), %xmm6; /* Load IV. */ + vinserti128 $1, %xmm0, %ymm6, %ymm5; + vextracti128 $1, %ymm3, (%esi); /* Store IV. */ + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm4, %ymm1, %ymm1; + vpxor %ymm4, %ymm2, %ymm2; + vpxor %ymm4, %ymm3, %ymm3; + vmovdqu (1 * 16)(%ecx), %ymm6; + vmovdqu (3 * 16)(%ecx), %ymm7; + + /* AES rounds */ + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 8+24(%esp); + jb .Lcbc_dec_blk8_last; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lcbc_dec_blk8_last; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lcbc_dec_blk8_last: + vpxor %ymm4, %ymm5, %ymm5; + vpxor %ymm4, %ymm6, %ymm6; + vpxor %ymm4, %ymm7, %ymm7; + vpxor (5 * 16)(%ecx), %ymm4, %ymm4; + leal (8 * 16)(%ecx), %ecx; + vaesdeclast %ymm5, %ymm0, %ymm0; + vaesdeclast %ymm6, %ymm1, %ymm1; + vaesdeclast %ymm7, %ymm2, %ymm2; + vaesdeclast %ymm4, %ymm3, %ymm3; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + vmovdqu %ymm2, (4 * 16)(%edx); + vmovdqu %ymm3, (6 * 16)(%edx); + leal (8 * 16)(%edx), %edx; + + jmp .Lcbc_dec_blk8; + + /* Handle trailing four blocks. */ +.align 8 +.Lcbc_dec_blk4: + cmpl $4, %eax; + jb .Lcbc_dec_blk1; + + leal -4(%eax), %eax; + + /* Load input and xor first key. Update IV. */ + vbroadcasti128 (0 * 16)(%edi), %ymm4; + vmovdqu (0 * 16)(%ecx), %ymm0; + vmovdqu (2 * 16)(%ecx), %ymm1; + vmovdqu (%esi), %xmm6; /* Load IV. */ + vinserti128 $1, %xmm0, %ymm6, %ymm5; + vextracti128 $1, %ymm1, (%esi); /* Store IV. */ + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm4, %ymm1, %ymm1; + vmovdqu (1 * 16)(%ecx), %ymm6; + leal (4 * 16)(%ecx), %ecx; + + /* AES rounds */ + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 8+24(%esp); + jb .Lcbc_dec_blk4_last; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lcbc_dec_blk4_last; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lcbc_dec_blk4_last: + vpxor %ymm4, %ymm5, %ymm5; + vpxor %ymm4, %ymm6, %ymm6; + vaesdeclast %ymm5, %ymm0, %ymm0; + vaesdeclast %ymm6, %ymm1, %ymm1; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + leal (4 * 16)(%edx), %edx; + + /* Process trailing one to three blocks, one per loop. */ +.align 8 +.Lcbc_dec_blk1: + cmpl $1, %eax; + jb .Ldone_cbc_dec; + + leal -1(%eax), %eax; + + /* Load input. */ + vmovdqu (%ecx), %xmm2; + leal 16(%ecx), %ecx; + + /* Xor first key. */ + vpxor (0 * 16)(%edi), %xmm2, %xmm0; + + /* AES rounds. */ + vaesdec (1 * 16)(%edi), %xmm0, %xmm0; + vaesdec (2 * 16)(%edi), %xmm0, %xmm0; + vaesdec (3 * 16)(%edi), %xmm0, %xmm0; + vaesdec (4 * 16)(%edi), %xmm0, %xmm0; + vaesdec (5 * 16)(%edi), %xmm0, %xmm0; + vaesdec (6 * 16)(%edi), %xmm0, %xmm0; + vaesdec (7 * 16)(%edi), %xmm0, %xmm0; + vaesdec (8 * 16)(%edi), %xmm0, %xmm0; + vaesdec (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + cmpl $12, 8+24(%esp); + jb .Lcbc_dec_blk1_last; + vaesdec %xmm1, %xmm0, %xmm0; + vaesdec (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Lcbc_dec_blk1_last; + vaesdec %xmm1, %xmm0, %xmm0; + vaesdec (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + + /* Last round and output handling. */ + .Lcbc_dec_blk1_last: + vpxor (%esi), %xmm1, %xmm1; + vaesdeclast %xmm1, %xmm0, %xmm0; + vmovdqu %xmm2, (%esi); + vmovdqu %xmm0, (%edx); + leal 16(%edx), %edx; + + jmp .Lcbc_dec_blk1; + +.align 8 +.Ldone_cbc_dec: + popl %esi; + CFI_POP(%esi); + popl %edi; + CFI_POP(%edi); + vzeroall; + ret_spec_stop + CFI_ENDPROC(); +ELF(.size SYM_NAME(_gcry_vaes_avx2_cbc_dec_i386), + .-SYM_NAME(_gcry_vaes_avx2_cbc_dec_i386)) + +/********************************************************************** + CFB-mode decryption + **********************************************************************/ +ELF(.type SYM_NAME(_gcry_vaes_avx2_cfb_dec_i386),@function) +.globl SYM_NAME(_gcry_vaes_avx2_cfb_dec_i386) +.align 16 +SYM_NAME(_gcry_vaes_avx2_cfb_dec_i386): + /* input: + * (esp + 4): round keys + * (esp + 8): iv + * (esp + 12): dst + * (esp + 16): src + * (esp + 20): nblocks + * (esp + 24): nrounds + */ + CFI_STARTPROC(); + pushl %edi; + CFI_PUSH(%edi); + pushl %esi; + CFI_PUSH(%esi); + + movl 8+4(%esp), %edi; + movl 8+8(%esp), %esi; + movl 8+12(%esp), %edx; + movl 8+16(%esp), %ecx; + movl 8+20(%esp), %eax; + + /* Process 8 blocks per loop. */ +.align 8 +.Lcfb_dec_blk8: + cmpl $8, %eax; + jb .Lcfb_dec_blk4; + + leal -8(%eax), %eax; + + /* Load IV. */ + vmovdqu (%esi), %xmm0; + + /* Load input and xor first key. Update IV. */ + vbroadcasti128 (0 * 16)(%edi), %ymm4; + vmovdqu (0 * 16)(%ecx), %ymm5; + vinserti128 $1, %xmm5, %ymm0, %ymm0; + vmovdqu (1 * 16)(%ecx), %ymm1; + vmovdqu (3 * 16)(%ecx), %ymm2; + vmovdqu (5 * 16)(%ecx), %ymm3; + vmovdqu (7 * 16)(%ecx), %xmm6; + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm4, %ymm1, %ymm1; + vpxor %ymm4, %ymm2, %ymm2; + vpxor %ymm4, %ymm3, %ymm3; + vbroadcasti128 (1 * 16)(%edi), %ymm4; + vmovdqu %xmm6, (%esi); /* Store IV. */ + vmovdqu (2 * 16)(%ecx), %ymm6; + vmovdqu (4 * 16)(%ecx), %ymm7; + + /* AES rounds */ + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 8+24(%esp); + jb .Lcfb_dec_blk8_last; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lcfb_dec_blk8_last; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lcfb_dec_blk8_last: + vpxor %ymm4, %ymm5, %ymm5; + vpxor %ymm4, %ymm6, %ymm6; + vpxor %ymm4, %ymm7, %ymm7; + vpxor (6 * 16)(%ecx), %ymm4, %ymm4; + leal (8 * 16)(%ecx), %ecx; + vaesenclast %ymm5, %ymm0, %ymm0; + vaesenclast %ymm6, %ymm1, %ymm1; + vaesenclast %ymm7, %ymm2, %ymm2; + vaesenclast %ymm4, %ymm3, %ymm3; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + vmovdqu %ymm2, (4 * 16)(%edx); + vmovdqu %ymm3, (6 * 16)(%edx); + leal (8 * 16)(%edx), %edx; + + jmp .Lcfb_dec_blk8; + + /* Handle trailing four blocks. */ +.align 8 +.Lcfb_dec_blk4: + cmpl $4, %eax; + jb .Lcfb_dec_blk1; + + leal -4(%eax), %eax; + + /* Load IV. */ + vmovdqu (%esi), %xmm0; + + /* Load input and xor first key. Update IV. */ + vbroadcasti128 (0 * 16)(%edi), %ymm4; + vmovdqu (0 * 16)(%ecx), %ymm5; + vinserti128 $1, %xmm5, %ymm0, %ymm0; + vmovdqu (1 * 16)(%ecx), %ymm1; + vmovdqu (3 * 16)(%ecx), %xmm6; + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm4, %ymm1, %ymm1; + vbroadcasti128 (1 * 16)(%edi), %ymm4; + vmovdqu %xmm6, (%esi); /* Store IV. */ + vmovdqu (2 * 16)(%ecx), %ymm6; + + leal (4 * 16)(%ecx), %ecx; + + /* AES rounds */ + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 8+24(%esp); + jb .Lcfb_dec_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lcfb_dec_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lcfb_dec_blk4_last: + vpxor %ymm4, %ymm5, %ymm5; + vpxor %ymm4, %ymm6, %ymm6; + vaesenclast %ymm5, %ymm0, %ymm0; + vaesenclast %ymm6, %ymm1, %ymm1; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + leal (4 * 16)(%edx), %edx; + + /* Process trailing one to three blocks, one per loop. */ +.align 8 +.Lcfb_dec_blk1: + cmpl $1, %eax; + jb .Ldone_cfb_dec; + + leal -1(%eax), %eax; + + /* Load IV. */ + vmovdqu (%esi), %xmm0; + + /* Xor first key. */ + vpxor (0 * 16)(%edi), %xmm0, %xmm0; + + /* Load input as next IV. */ + vmovdqu (%ecx), %xmm2; + leal 16(%ecx), %ecx; + + /* AES rounds. */ + vaesenc (1 * 16)(%edi), %xmm0, %xmm0; + vaesenc (2 * 16)(%edi), %xmm0, %xmm0; + vaesenc (3 * 16)(%edi), %xmm0, %xmm0; + vaesenc (4 * 16)(%edi), %xmm0, %xmm0; + vaesenc (5 * 16)(%edi), %xmm0, %xmm0; + vaesenc (6 * 16)(%edi), %xmm0, %xmm0; + vaesenc (7 * 16)(%edi), %xmm0, %xmm0; + vaesenc (8 * 16)(%edi), %xmm0, %xmm0; + vaesenc (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + vmovdqu %xmm2, (%esi); /* Store IV. */ + cmpl $12, 8+24(%esp); + jb .Lcfb_dec_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Lcfb_dec_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + + /* Last round and output handling. */ + .Lcfb_dec_blk1_last: + vpxor %xmm2, %xmm1, %xmm1; + vaesenclast %xmm1, %xmm0, %xmm0; + vmovdqu %xmm0, (%edx); + leal 16(%edx), %edx; + + jmp .Lcfb_dec_blk1; + +.align 8 +.Ldone_cfb_dec: + popl %esi; + CFI_POP(%esi); + popl %edi; + CFI_POP(%edi); + vzeroall; + ret_spec_stop + CFI_ENDPROC(); +ELF(.size SYM_NAME(_gcry_vaes_avx2_cfb_dec_i386), + .-SYM_NAME(_gcry_vaes_avx2_cfb_dec_i386)) + +/********************************************************************** + CTR-mode encryption + **********************************************************************/ +ELF(.type SYM_NAME(_gcry_vaes_avx2_ctr_enc_i386),@function) +.globl SYM_NAME(_gcry_vaes_avx2_ctr_enc_i386) +.align 16 +SYM_NAME(_gcry_vaes_avx2_ctr_enc_i386): + /* input: + * (esp + 4): round keys + * (esp + 8): iv + * (esp + 12): dst + * (esp + 16): src + * (esp + 20): nblocks + * (esp + 24): nrounds + */ + CFI_STARTPROC(); + + GET_DATA_POINTER(SYM_NAME(_gcry_vaes_consts), eax); + + pushl %ebp; + CFI_PUSH(%ebp); + movl %esp, %ebp; + CFI_DEF_CFA_REGISTER(%ebp); + + subl $(3 * 32 + 3 * 4), %esp; + andl $-32, %esp; + + movl %edi, (3 * 32 + 0 * 4)(%esp); + CFI_REG_ON_STACK(edi, 3 * 32 + 0 * 4); + movl %esi, (3 * 32 + 1 * 4)(%esp); + CFI_REG_ON_STACK(esi, 3 * 32 + 1 * 4); + movl %ebx, (3 * 32 + 2 * 4)(%esp); + CFI_REG_ON_STACK(ebx, 3 * 32 + 2 * 4); + + movl %eax, %ebx; + movl 4+4(%ebp), %edi; + movl 4+8(%ebp), %esi; + movl 4+12(%ebp), %edx; + movl 4+16(%ebp), %ecx; + +#define prepare_ctr_const(minus_one, minus_two) \ + vpcmpeqd minus_one, minus_one, minus_one; \ + vpsrldq $8, minus_one, minus_one; /* 0:-1 */ \ + vpaddq minus_one, minus_one, minus_two; /* 0:-2 */ + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \ + vpcmpeqq minus_one, x, tmp1; \ + vpcmpeqq minus_two, x, tmp2; \ + vpor tmp1, tmp2, tmp2; \ + vpsubq minus_two, x, x; \ + vpslldq $8, tmp2, tmp2; \ + vpsubq tmp2, x, x; + +#define handle_ctr_128bit_add(nblks) \ + movl 12(%esi), %eax; \ + bswapl %eax; \ + addl $nblks, %eax; \ + bswapl %eax; \ + movl %eax, 12(%esi); \ + jnc 1f; \ + \ + movl 8(%esi), %eax; \ + bswapl %eax; \ + adcl $0, %eax; \ + bswapl %eax; \ + movl %eax, 8(%esi); \ + \ + movl 4(%esi), %eax; \ + bswapl %eax; \ + adcl $0, %eax; \ + bswapl %eax; \ + movl %eax, 4(%esi); \ + \ + movl 0(%esi), %eax; \ + bswapl %eax; \ + adcl $0, %eax; \ + bswapl %eax; \ + movl %eax, 0(%esi); \ + .align 8; \ + 1:; + + cmpl $12, 4+20(%ebp); + jae .Lctr_enc_blk12_loop; + jmp .Lctr_enc_blk4; + + /* Process 12 blocks per loop. */ +.align 16 +.Lctr_enc_blk12_loop: + subl $12, 4+20(%ebp); + + vbroadcasti128 (%esi), %ymm6; + + /* detect if carry handling is needed */ + movl 12(%esi), %eax; + addl $(12 << 24), %eax; + jc .Lctr_enc_blk12_handle_carry; + movl %eax, 12(%esi); + + .Lctr_enc_blk12_byte_bige_add: + /* Increment counters. */ + vpaddb CADDR(.Lbige_addb_0, %ebx), %ymm6, %ymm0; + vpaddb CADDR(.Lbige_addb_2, %ebx), %ymm6, %ymm1; + vpaddb CADDR(.Lbige_addb_4, %ebx), %ymm6, %ymm2; + vpaddb CADDR(.Lbige_addb_6, %ebx), %ymm6, %ymm3; + vpaddb CADDR(.Lbige_addb_8, %ebx), %ymm6, %ymm5; + vpaddb CADDR(.Lbige_addb_10, %ebx), %ymm6, %ymm6; + + .Lctr_enc_blk12_rounds: + /* AES rounds */ + vbroadcasti128 (0 * 16)(%edi), %ymm4; + XOR6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 4+24(%ebp); + jb .Lctr_enc_blk12_last; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lctr_enc_blk12_last; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lctr_enc_blk12_last: + vpxor (0 * 16)(%ecx), %ymm4, %ymm7; /* Xor src to last round key. */ + vaesenclast %ymm7, %ymm0, %ymm0; + vmovdqu %ymm0, (0 * 16)(%edx); + vpxor (2 * 16)(%ecx), %ymm4, %ymm7; + vpxor (4 * 16)(%ecx), %ymm4, %ymm0; + vaesenclast %ymm7, %ymm1, %ymm1; + vaesenclast %ymm0, %ymm2, %ymm2; + vpxor (6 * 16)(%ecx), %ymm4, %ymm7; + vpxor (8 * 16)(%ecx), %ymm4, %ymm0; + vpxor (10 * 16)(%ecx), %ymm4, %ymm4; + leal (12 * 16)(%ecx), %ecx; + vaesenclast %ymm7, %ymm3, %ymm3; + vaesenclast %ymm0, %ymm5, %ymm5; + vaesenclast %ymm4, %ymm6, %ymm6; + vmovdqu %ymm1, (2 * 16)(%edx); + vmovdqu %ymm2, (4 * 16)(%edx); + vmovdqu %ymm3, (6 * 16)(%edx); + vmovdqu %ymm5, (8 * 16)(%edx); + vmovdqu %ymm6, (10 * 16)(%edx); + leal (12 * 16)(%edx), %edx; + + cmpl $12, 4+20(%ebp); + jae .Lctr_enc_blk12_loop; + jmp .Lctr_enc_blk4; + + .align 8 + .Lctr_enc_blk12_handle_only_ctr_carry: + handle_ctr_128bit_add(12); + jmp .Lctr_enc_blk12_byte_bige_add; + + .align 8 + .Lctr_enc_blk12_handle_carry: + jz .Lctr_enc_blk12_handle_only_ctr_carry; + /* Increment counters (handle carry). */ + prepare_ctr_const(%ymm4, %ymm7); + vmovdqa CADDR(.Lbswap128_mask, %ebx), %ymm2; + vpshufb %xmm2, %xmm6, %xmm1; /* be => le */ + vmovdqa %xmm1, %xmm0; + inc_le128(%xmm1, %xmm4, %xmm5); + vinserti128 $1, %xmm1, %ymm0, %ymm6; /* ctr: +1:+0 */ + handle_ctr_128bit_add(12); + vpshufb %ymm2, %ymm6, %ymm0; + vmovdqa %ymm0, (0 * 32)(%esp); + add2_le128(%ymm6, %ymm4, %ymm7, %ymm5, %ymm1); /* ctr: +3:+2 */ + vpshufb %ymm2, %ymm6, %ymm0; + vmovdqa %ymm0, (1 * 32)(%esp); + add2_le128(%ymm6, %ymm4, %ymm7, %ymm5, %ymm1); /* ctr: +5:+4 */ + vpshufb %ymm2, %ymm6, %ymm0; + vmovdqa %ymm0, (2 * 32)(%esp); + add2_le128(%ymm6, %ymm4, %ymm7, %ymm5, %ymm1); /* ctr: +7:+6 */ + vpshufb %ymm2, %ymm6, %ymm3; + add2_le128(%ymm6, %ymm4, %ymm7, %ymm5, %ymm1); /* ctr: +9:+8 */ + vpshufb %ymm2, %ymm6, %ymm5; + add2_le128(%ymm6, %ymm4, %ymm7, %ymm2, %ymm1); /* ctr: +11:+10 */ + vmovdqa (0 * 32)(%esp), %ymm0; + vmovdqa (1 * 32)(%esp), %ymm1; + vmovdqa (2 * 32)(%esp), %ymm2; + vpshufb CADDR(.Lbswap128_mask, %ebx), %ymm6, %ymm6; + + jmp .Lctr_enc_blk12_rounds; + + /* Handle trailing four blocks. */ +.align 8 +.Lctr_enc_blk4: + cmpl $4, 4+20(%ebp); + jb .Lctr_enc_blk1; + + subl $4, 4+20(%ebp); + + vbroadcasti128 (%esi), %ymm3; + + /* detect if carry handling is needed */ + movl 12(%esi), %eax; + addl $(4 << 24), %eax; + jc .Lctr_enc_blk4_handle_carry; + movl %eax, 12(%esi); + + .Lctr_enc_blk4_byte_bige_add: + /* Increment counters. */ + vpaddb CADDR(.Lbige_addb_0, %ebx), %ymm3, %ymm0; + vpaddb CADDR(.Lbige_addb_2, %ebx), %ymm3, %ymm1; + + .Lctr_enc_blk4_rounds: + /* AES rounds */ + vbroadcasti128 (0 * 16)(%edi), %ymm4; + XOR2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 4+24(%ebp); + jb .Lctr_enc_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lctr_enc_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lctr_enc_blk4_last: + vpxor (0 * 16)(%ecx), %ymm4, %ymm5; /* Xor src to last round key. */ + vpxor (2 * 16)(%ecx), %ymm4, %ymm6; + leal (4 * 16)(%ecx), %ecx; + vaesenclast %ymm5, %ymm0, %ymm0; + vaesenclast %ymm6, %ymm1, %ymm1; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + leal (4 * 16)(%edx), %edx; + + jmp .Lctr_enc_blk1; + + .align 8 + .Lctr_enc_blk4_handle_only_ctr_carry: + handle_ctr_128bit_add(4); + jmp .Lctr_enc_blk4_byte_bige_add; + + .align 8 + .Lctr_enc_blk4_handle_carry: + jz .Lctr_enc_blk4_handle_only_ctr_carry; + /* Increment counters (handle carry). */ + prepare_ctr_const(%ymm4, %ymm7); + vpshufb CADDR(.Lbswap128_mask, %ebx), %xmm3, %xmm1; /* be => le */ + vmovdqa %xmm1, %xmm0; + inc_le128(%xmm1, %xmm4, %xmm5); + vinserti128 $1, %xmm1, %ymm0, %ymm3; /* ctr: +1:+0 */ + vpshufb CADDR(.Lbswap128_mask, %ebx), %ymm3, %ymm0; + handle_ctr_128bit_add(4); + add2_le128(%ymm3, %ymm4, %ymm7, %ymm5, %ymm6); /* ctr: +3:+2 */ + vpshufb CADDR(.Lbswap128_mask, %ebx), %ymm3, %ymm1; + + jmp .Lctr_enc_blk4_rounds; + + /* Process trailing one to three blocks, one per loop. */ +.align 8 +.Lctr_enc_blk1: + cmpl $1, 4+20(%ebp); + jb .Ldone_ctr_enc; + + subl $1, 4+20(%ebp); + + /* Load and increament counter. */ + vmovdqu (%esi), %xmm0; + handle_ctr_128bit_add(1); + + /* AES rounds. */ + vpxor (0 * 16)(%edi), %xmm0, %xmm0; + vaesenc (1 * 16)(%edi), %xmm0, %xmm0; + vaesenc (2 * 16)(%edi), %xmm0, %xmm0; + vaesenc (3 * 16)(%edi), %xmm0, %xmm0; + vaesenc (4 * 16)(%edi), %xmm0, %xmm0; + vaesenc (5 * 16)(%edi), %xmm0, %xmm0; + vaesenc (6 * 16)(%edi), %xmm0, %xmm0; + vaesenc (7 * 16)(%edi), %xmm0, %xmm0; + vaesenc (8 * 16)(%edi), %xmm0, %xmm0; + vaesenc (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + cmpl $12, 4+24(%ebp); + jb .Lctr_enc_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Lctr_enc_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + + /* Last round and output handling. */ + .Lctr_enc_blk1_last: + vpxor (%ecx), %xmm1, %xmm1; /* Xor src to last round key. */ + leal 16(%ecx), %ecx; + vaesenclast %xmm1, %xmm0, %xmm0; /* Last round and xor with xmm1. */ + vmovdqu %xmm0, (%edx); + leal 16(%edx), %edx; + + jmp .Lctr_enc_blk1; + +.align 8 +.Ldone_ctr_enc: + vpxor %ymm0, %ymm0, %ymm0; + movl (3 * 32 + 0 * 4)(%esp), %edi; + CFI_RESTORE(edi); + movl (3 * 32 + 1 * 4)(%esp), %esi; + CFI_RESTORE(esi); + movl (3 * 32 + 2 * 4)(%esp), %ebx; + CFI_RESTORE(ebx); + vmovdqa %ymm0, (0 * 32)(%esp); + vmovdqa %ymm0, (1 * 32)(%esp); + vmovdqa %ymm0, (2 * 32)(%esp); + leave; + CFI_LEAVE(); + vzeroall; + ret_spec_stop + CFI_ENDPROC(); +ELF(.size SYM_NAME(_gcry_vaes_avx2_ctr_enc_i386), + .-SYM_NAME(_gcry_vaes_avx2_ctr_enc_i386)) + +/********************************************************************** + Little-endian 32-bit CTR-mode encryption (GCM-SIV) + **********************************************************************/ +ELF(.type SYM_NAME(_gcry_vaes_avx2_ctr32le_enc_i386),@function) +.globl SYM_NAME(_gcry_vaes_avx2_ctr32le_enc_i386) +.align 16 +SYM_NAME(_gcry_vaes_avx2_ctr32le_enc_i386): + /* input: + * (esp + 4): round keys + * (esp + 8): counter + * (esp + 12): dst + * (esp + 16): src + * (esp + 20): nblocks + * (esp + 24): nrounds + */ + CFI_STARTPROC(); + + GET_DATA_POINTER(SYM_NAME(_gcry_vaes_consts), eax); + + pushl %ebp; + CFI_PUSH(%ebp); + movl %esp, %ebp; + CFI_DEF_CFA_REGISTER(%ebp); + + subl $(3 * 4), %esp; + + movl %edi, (0 * 4)(%esp); + CFI_REG_ON_STACK(edi, 0 * 4); + movl %esi, (1 * 4)(%esp); + CFI_REG_ON_STACK(esi, 1 * 4); + movl %ebx, (2 * 4)(%esp); + CFI_REG_ON_STACK(ebx, 2 * 4); + + movl %eax, %ebx; + movl 4+4(%ebp), %edi; + movl 4+8(%ebp), %esi; + movl 4+12(%ebp), %edx; + movl 4+16(%ebp), %ecx; + movl 4+20(%ebp), %eax; + + vbroadcasti128 (%esi), %ymm7; /* Load CTR. */ + + /* Process 12 blocks per loop. */ +.align 8 +.Lctr32le_enc_blk12: + cmpl $12, %eax; + jb .Lctr32le_enc_blk4; + + leal -12(%eax), %eax; + + vbroadcasti128 (0 * 16)(%edi), %ymm4; + + /* Increment counters. */ + vpaddd CADDR(.Lle_addd_0, %ebx), %ymm7, %ymm0; + vpaddd CADDR(.Lle_addd_2, %ebx), %ymm7, %ymm1; + vpaddd CADDR(.Lle_addd_4, %ebx), %ymm7, %ymm2; + vpaddd CADDR(.Lle_addd_6, %ebx), %ymm7, %ymm3; + vpaddd CADDR(.Lle_addd_8, %ebx), %ymm7, %ymm5; + vpaddd CADDR(.Lle_addd_10, %ebx), %ymm7, %ymm6; + + vpaddd CADDR(.Lle_addd_12_2, %ebx), %ymm7, %ymm7; + vmovdqu %xmm7, (%esi); /* Store CTR. */ + + /* AES rounds */ + XOR6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 4+24(%ebp); + jb .Lctr32le_enc_blk8_last; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lctr32le_enc_blk8_last; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC6(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3, %ymm5, %ymm6); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lctr32le_enc_blk8_last: + vpxor (0 * 16)(%ecx), %ymm4, %ymm7; /* Xor src to last round key. */ + vaesenclast %ymm7, %ymm0, %ymm0; + vpxor (2 * 16)(%ecx), %ymm4, %ymm7; + vaesenclast %ymm7, %ymm1, %ymm1; + vpxor (4 * 16)(%ecx), %ymm4, %ymm7; + vaesenclast %ymm7, %ymm2, %ymm2; + vpxor (6 * 16)(%ecx), %ymm4, %ymm7; + vaesenclast %ymm7, %ymm3, %ymm3; + vpxor (8 * 16)(%ecx), %ymm4, %ymm7; + vpxor (10 * 16)(%ecx), %ymm4, %ymm4; + vaesenclast %ymm7, %ymm5, %ymm5; + vbroadcasti128 (%esi), %ymm7; /* Reload CTR. */ + vaesenclast %ymm4, %ymm6, %ymm6; + leal (12 * 16)(%ecx), %ecx; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + vmovdqu %ymm2, (4 * 16)(%edx); + vmovdqu %ymm3, (6 * 16)(%edx); + vmovdqu %ymm5, (8 * 16)(%edx); + vmovdqu %ymm6, (10 * 16)(%edx); + leal (12 * 16)(%edx), %edx; + + jmp .Lctr32le_enc_blk12; + + /* Handle trailing four blocks. */ +.align 8 +.Lctr32le_enc_blk4: + cmpl $4, %eax; + jb .Lctr32le_enc_blk1; + + leal -4(%eax), %eax; + + vbroadcasti128 (0 * 16)(%edi), %ymm4; + + /* Increment counters. */ + vpaddd CADDR(.Lle_addd_0, %ebx), %ymm7, %ymm0; + vpaddd CADDR(.Lle_addd_2, %ebx), %ymm7, %ymm1; + + vpaddd CADDR(.Lle_addd_4_2, %ebx), %ymm7, %ymm7; + + /* AES rounds */ + XOR2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 4+24(%ebp); + jb .Lctr32le_enc_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lctr32le_enc_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lctr32le_enc_blk4_last: + vpxor (0 * 16)(%ecx), %ymm4, %ymm5; /* Xor src to last round key. */ + vpxor (2 * 16)(%ecx), %ymm4, %ymm6; + leal (4 * 16)(%ecx), %ecx; + vaesenclast %ymm5, %ymm0, %ymm0; + vaesenclast %ymm6, %ymm1, %ymm1; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + leal (4 * 16)(%edx), %edx; + + /* Process trailing one to three blocks, one per loop. */ +.align 8 +.Lctr32le_enc_blk1: + cmpl $1, %eax; + jb .Ldone_ctr32le_enc; + + leal -1(%eax), %eax; + + /* Load and increament counter. */ + vmovdqu %xmm7, %xmm0; + vpaddd CADDR(.Lle_addd_1, %ebx), %xmm7, %xmm7; + + /* AES rounds. */ + vpxor (0 * 16)(%edi), %xmm0, %xmm0; + vaesenc (1 * 16)(%edi), %xmm0, %xmm0; + vaesenc (2 * 16)(%edi), %xmm0, %xmm0; + vaesenc (3 * 16)(%edi), %xmm0, %xmm0; + vaesenc (4 * 16)(%edi), %xmm0, %xmm0; + vaesenc (5 * 16)(%edi), %xmm0, %xmm0; + vaesenc (6 * 16)(%edi), %xmm0, %xmm0; + vaesenc (7 * 16)(%edi), %xmm0, %xmm0; + vaesenc (8 * 16)(%edi), %xmm0, %xmm0; + vaesenc (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + cmpl $12, 4+24(%ebp); + jb .Lctr32le_enc_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Lctr32le_enc_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + + /* Last round and output handling. */ + .Lctr32le_enc_blk1_last: + vpxor (%ecx), %xmm1, %xmm1; /* Xor src to last round key. */ + leal 16(%ecx), %ecx; + vaesenclast %xmm1, %xmm0, %xmm0; /* Last round and xor with xmm1. */ + vmovdqu %xmm0, (%edx); + leal 16(%edx), %edx; + + jmp .Lctr32le_enc_blk1; + +.align 8 +.Ldone_ctr32le_enc: + vmovdqu %xmm7, (%esi); /* Store CTR. */ + movl (0 * 4)(%esp), %edi; + CFI_RESTORE(edi); + movl (1 * 4)(%esp), %esi; + CFI_RESTORE(esi); + movl (2 * 4)(%esp), %ebx; + CFI_RESTORE(ebx); + leave; + CFI_LEAVE(); + vzeroall; + ret_spec_stop + CFI_ENDPROC(); +ELF(.size SYM_NAME(_gcry_vaes_avx2_ctr32le_enc_i386), + .-SYM_NAME(_gcry_vaes_avx2_ctr32le_enc_i386)) + +/********************************************************************** + OCB-mode encryption/decryption/authentication + **********************************************************************/ +ELF(.type SYM_NAME(_gcry_vaes_avx2_ocb_crypt_i386),@function) +.globl SYM_NAME(_gcry_vaes_avx2_ocb_crypt_i386) +.align 16 +SYM_NAME(_gcry_vaes_avx2_ocb_crypt_i386): + /* input: + * (esp + 4): round keys + * (esp + 8): dst + * (esp + 12): src + * (esp + 16): nblocks + * (esp + 20): nrounds + * (esp + 24): offset + * (esp + 28): checksum + * (esp + 32): blkn + * (esp + 36): L table + * (esp + 44): encrypt/decrypt/auth mode + */ + CFI_STARTPROC(); + + pushl %ebp; + CFI_PUSH(%ebp); + movl %esp, %ebp; + CFI_DEF_CFA_REGISTER(%ebp); + +#define STACK_VEC_POS 0 +#define STACK_TMP_Y0 (STACK_VEC_POS + 0 * 32) +#define STACK_TMP_Y1 (STACK_VEC_POS + 1 * 32) +#define STACK_TMP_Y2 (STACK_VEC_POS + 2 * 32) +#define STACK_TMP_Y3 (STACK_VEC_POS + 3 * 32) +#define STACK_TMP_Y4 (STACK_VEC_POS + 4 * 32) +#define STACK_TMP_Y5 (STACK_VEC_POS + 5 * 32) +#define STACK_FXL_KEY (STACK_VEC_POS + 6 * 32) +#define STACK_OFFSET_AND_F_KEY (STACK_VEC_POS + 7 * 32) +#define STACK_CHECKSUM (STACK_VEC_POS + 8 * 32) +#define STACK_GPR_POS (9 * 32) +#define STACK_END_POS (STACK_GPR_POS + 3 * 4) + + subl $STACK_END_POS, %esp; + andl $-32, %esp; + + movl %edi, (STACK_GPR_POS + 0 * 4)(%esp); + CFI_REG_ON_STACK(edi, STACK_GPR_POS + 0 * 4); + movl %esi, (STACK_GPR_POS + 1 * 4)(%esp); + CFI_REG_ON_STACK(esi, STACK_GPR_POS + 1 * 4); + movl %ebx, (STACK_GPR_POS + 2 * 4)(%esp); + CFI_REG_ON_STACK(ebx, STACK_GPR_POS + 2 * 4); + + movl 4+4(%ebp), %edi; + movl 4+8(%ebp), %esi; + movl 4+12(%ebp), %edx; + movl 4+32(%ebp), %ebx; + + movl 4+24(%ebp), %eax; + movl 4+20(%ebp), %ecx; + leal (, %ecx, 4), %ecx; + vmovdqu (%eax), %xmm1; /* offset */ + vmovdqa (%edi), %xmm0; /* first key */ + vpxor %xmm0, %xmm1, %xmm1; /* offset ^ first key */ + vpxor (%edi, %ecx, 4), %xmm0, %xmm0; /* first key ^ last key */ + vinserti128 $1, %xmm0, %ymm0, %ymm0; + vpxor %ymm2, %ymm2, %ymm2; + vmovdqa %xmm1, (STACK_OFFSET_AND_F_KEY)(%esp); + vmovdqa %ymm2, (STACK_CHECKSUM)(%esp); + vmovdqa %ymm0, (STACK_FXL_KEY)(%esp); + + cmpl $12, 4+16(%ebp); + jae .Locb_crypt_blk12_loop; + jmp .Locb_crypt_blk4; + + /* Process 12 blocks per loop. */ +.align 16 +.Locb_crypt_blk12_loop: + subl $12, 4+16(%ebp); + + movl 4+36(%ebp), %ecx; + vmovdqa (%ecx), %xmm7; /* Preload L[0] */ + + testl $1, %ebx; + jz .Locb_crypt_blk12_nblk_even; + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + leal 1(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+1) + shll $4, %eax; + vmovdqa (STACK_OFFSET_AND_F_KEY)(%esp), %xmm1; + vpxor (%ecx, %eax), %xmm1, %xmm1; + + vpxor %xmm7, %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm1; + vmovdqa %ymm1, (STACK_TMP_Y0)(%esp); + + leal 3(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+3) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm0, %xmm1; + + vpxor %xmm7, %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm2; + + leal 5(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+5) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm0, %xmm1; + + vpxor %xmm7, %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm3; + + leal 7(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+7) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm0, %xmm1; + + vpxor %xmm7, %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm4; + + leal 9(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+9) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm0, %xmm1; + + vpxor %xmm7, %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm5; + + leal 11(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+11) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm0, %xmm1; + + leal 12(%ebx), %ebx; + vpxor %xmm7, %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm6; + + cmpl $1, 4+40(%ebp); + jb .Locb_dec_blk12; + ja .Locb_auth_blk12; + jmp .Locb_enc_blk12; + + .align 8 + .Locb_crypt_blk12_nblk_even: + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + vpxor (STACK_OFFSET_AND_F_KEY)(%esp), %xmm7, %xmm1; + + leal 2(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+2) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm1; + vmovdqa %ymm1, (STACK_TMP_Y0)(%esp); + + vpxor %xmm7, %xmm0, %xmm1; + + leal 4(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+4) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm2; + + vpxor %xmm7, %xmm0, %xmm1; + + leal 6(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+6) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm3; + + vpxor %xmm7, %xmm0, %xmm1; + + leal 8(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+8) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm4; + + vpxor %xmm7, %xmm0, %xmm1; + + leal 10(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+10) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm5; + + vpxor %xmm7, %xmm0, %xmm1; + + leal 12(%ebx), %ebx; + tzcntl %ebx, %eax; // ntz(blkn+12) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm1, %xmm0; + vinserti128 $1, %xmm0, %ymm1, %ymm6; + + cmpl $1, 4+40(%ebp); + jb .Locb_dec_blk12; + ja .Locb_auth_blk12; + + .align 8 + .Locb_enc_blk12: + vmovdqa %ymm2, (STACK_TMP_Y1)(%esp); + vmovdqa %ymm3, (STACK_TMP_Y2)(%esp); + vmovdqa %ymm4, (STACK_TMP_Y3)(%esp); + vmovdqa %ymm5, (STACK_TMP_Y4)(%esp); + vmovdqa %ymm6, (STACK_TMP_Y5)(%esp); + vmovdqa %xmm0, (STACK_OFFSET_AND_F_KEY)(%esp); + + vmovdqu 0*16(%edx), %ymm1; + vmovdqu 2*16(%edx), %ymm2; + vmovdqu 4*16(%edx), %ymm3; + vmovdqu 6*16(%edx), %ymm4; + vmovdqu 8*16(%edx), %ymm5; + vmovdqu 10*16(%edx), %ymm6; + leal 12*16(%edx), %edx; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vpxor %ymm1, %ymm2, %ymm0; + vpxor %ymm3, %ymm4, %ymm7; + vpxor %ymm5, %ymm0, %ymm0; + vpxor %ymm6, %ymm7, %ymm7; + vpxor %ymm0, %ymm7, %ymm7; + vbroadcasti128 (1 * 16)(%edi), %ymm0; + vpxor (STACK_CHECKSUM)(%esp), %ymm7, %ymm7; + + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + vpxor (STACK_TMP_Y0)(%esp), %ymm1, %ymm1; + vpxor (STACK_TMP_Y1)(%esp), %ymm2, %ymm2; + vpxor (STACK_TMP_Y2)(%esp), %ymm3, %ymm3; + vpxor (STACK_TMP_Y3)(%esp), %ymm4, %ymm4; + vpxor (STACK_TMP_Y4)(%esp), %ymm5, %ymm5; + vpxor (STACK_TMP_Y5)(%esp), %ymm6, %ymm6; + + vmovdqa %ymm7, (STACK_CHECKSUM)(%esp); + + /* AES rounds */ + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (2 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (3 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (4 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (5 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (6 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (7 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (8 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (9 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + cmpl $12, 4+20(%ebp); + jb .Locb_enc_blk12_last; + vbroadcasti128 (10 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (11 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + jz .Locb_enc_blk12_last; + vbroadcasti128 (12 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (13 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + + /* Last round and output handling. */ + .Locb_enc_blk12_last: + vmovdqa (STACK_FXL_KEY)(%esp), %ymm0; + vpxor (STACK_TMP_Y0)(%esp), %ymm0, %ymm7; + vaesenclast %ymm7, %ymm1, %ymm1; + vpxor (STACK_TMP_Y1)(%esp), %ymm0, %ymm7; + vmovdqu %ymm1, 0*16(%esi); + vpxor (STACK_TMP_Y2)(%esp), %ymm0, %ymm1; + vaesenclast %ymm7, %ymm2, %ymm2; + vpxor (STACK_TMP_Y3)(%esp), %ymm0, %ymm7; + vaesenclast %ymm1, %ymm3, %ymm3; + vpxor (STACK_TMP_Y4)(%esp), %ymm0, %ymm1; + vaesenclast %ymm7, %ymm4, %ymm4; + vpxor (STACK_TMP_Y5)(%esp), %ymm0, %ymm7; + vaesenclast %ymm1, %ymm5, %ymm5; + vaesenclast %ymm7, %ymm6, %ymm6; + vmovdqu %ymm2, 2*16(%esi); + vmovdqu %ymm3, 4*16(%esi); + vmovdqu %ymm4, 6*16(%esi); + vmovdqu %ymm5, 8*16(%esi); + vmovdqu %ymm6, 10*16(%esi); + leal 12*16(%esi), %esi; + + cmpl $12, 4+16(%ebp); + jae .Locb_crypt_blk12_loop; + jmp .Locb_crypt_blk12_cleanup; + + .align 8 + .Locb_auth_blk12: + vmovdqa %xmm0, (STACK_OFFSET_AND_F_KEY)(%esp); + vbroadcasti128 (1 * 16)(%edi), %ymm0; + + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + vmovdqa (STACK_TMP_Y0)(%esp), %ymm1; + vpxor 0*16(%edx), %ymm1, %ymm1; + vpxor 2*16(%edx), %ymm2, %ymm2; + vpxor 4*16(%edx), %ymm3, %ymm3; + vpxor 6*16(%edx), %ymm4, %ymm4; + vpxor 8*16(%edx), %ymm5, %ymm5; + vpxor 10*16(%edx), %ymm6, %ymm6; + leal 12*16(%edx), %edx; + + /* AES rounds */ + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (2 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (3 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (4 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (5 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (6 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (7 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (8 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (9 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (10 * 16)(%edi), %ymm0; + cmpl $12, 4+20(%ebp); + jb .Locb_auth_blk12_last; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (11 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (12 * 16)(%edi), %ymm0; + jz .Locb_auth_blk12_last; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (13 * 16)(%edi), %ymm0; + VAESENC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (14 * 16)(%edi), %ymm0; + + /* Last round and output handling. */ + .Locb_auth_blk12_last: + vaesenclast %ymm0, %ymm1, %ymm1; + vaesenclast %ymm0, %ymm2, %ymm2; + vaesenclast %ymm0, %ymm3, %ymm3; + vaesenclast %ymm0, %ymm4, %ymm4; + vaesenclast %ymm0, %ymm5, %ymm5; + vaesenclast %ymm0, %ymm6, %ymm6; + + vpxor %ymm1, %ymm2, %ymm0; + vpxor %ymm3, %ymm4, %ymm4; + vpxor %ymm5, %ymm0, %ymm0; + vpxor %ymm6, %ymm4, %ymm4; + vpxor %ymm0, %ymm4, %ymm4; + vpxor (STACK_CHECKSUM)(%esp), %ymm4, %ymm4; + vmovdqa %ymm4, (STACK_CHECKSUM)(%esp); + + cmpl $12, 4+16(%ebp); + jae .Locb_crypt_blk12_loop; + jmp .Locb_crypt_blk12_cleanup; + + .align 8 + .Locb_dec_blk12: + vmovdqa %xmm0, (STACK_OFFSET_AND_F_KEY)(%esp); + + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + vmovdqa (STACK_TMP_Y0)(%esp), %ymm1; + vmovdqu 0*16(%edx), %ymm0; + vmovdqu 2*16(%edx), %ymm7; + vpxor %ymm0, %ymm1, %ymm1; + vmovdqa %ymm2, (STACK_TMP_Y1)(%esp); + vpxor %ymm7, %ymm2, %ymm2; + vmovdqu 4*16(%edx), %ymm0; + vmovdqu 6*16(%edx), %ymm7; + vmovdqa %ymm3, (STACK_TMP_Y2)(%esp); + vmovdqa %ymm4, (STACK_TMP_Y3)(%esp); + vpxor %ymm0, %ymm3, %ymm3; + vpxor %ymm7, %ymm4, %ymm4; + vmovdqu 8*16(%edx), %ymm0; + vmovdqu 10*16(%edx), %ymm7; + leal 12*16(%edx), %edx; + vmovdqa %ymm5, (STACK_TMP_Y4)(%esp); + vmovdqa %ymm6, (STACK_TMP_Y5)(%esp); + vpxor %ymm0, %ymm5, %ymm5; + vbroadcasti128 (1 * 16)(%edi), %ymm0; + vpxor %ymm7, %ymm6, %ymm6; + + /* AES rounds */ + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (2 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (3 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (4 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (5 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (6 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (7 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (8 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (9 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + cmpl $12, 4+20(%ebp); + jb .Locb_dec_blk12_last; + vbroadcasti128 (10 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (11 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + jz .Locb_dec_blk12_last; + vbroadcasti128 (12 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + vbroadcasti128 (13 * 16)(%edi), %ymm0; + VAESDEC6(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6); + + /* Last round and output handling. */ + .Locb_dec_blk12_last: + vmovdqa (STACK_FXL_KEY)(%esp), %ymm0; + vpxor (STACK_TMP_Y0)(%esp), %ymm0, %ymm7; + vaesdeclast %ymm7, %ymm1, %ymm1; + vmovdqu %ymm1, 0*16(%esi); + vpxor (STACK_TMP_Y1)(%esp), %ymm0, %ymm1; + vpxor (STACK_TMP_Y2)(%esp), %ymm0, %ymm7; + vaesdeclast %ymm1, %ymm2, %ymm2; + vpxor (STACK_TMP_Y3)(%esp), %ymm0, %ymm1; + vaesdeclast %ymm7, %ymm3, %ymm3; + vpxor (STACK_TMP_Y4)(%esp), %ymm0, %ymm7; + vaesdeclast %ymm1, %ymm4, %ymm4; + vpxor (STACK_TMP_Y5)(%esp), %ymm0, %ymm0; + vaesdeclast %ymm7, %ymm5, %ymm5; + vaesdeclast %ymm0, %ymm6, %ymm6; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vpxor %ymm2, %ymm3, %ymm0; + vpxor %ymm4, %ymm5, %ymm7; + vpxor %ymm6, %ymm0, %ymm0; + vpxor 0*16(%esi), %ymm7, %ymm7; + vpxor %ymm0, %ymm7, %ymm7; + vpxor (STACK_CHECKSUM)(%esp), %ymm7, %ymm7; + + vmovdqu %ymm2, 2*16(%esi); + vmovdqu %ymm3, 4*16(%esi); + vmovdqu %ymm4, 6*16(%esi); + vmovdqu %ymm5, 8*16(%esi); + vmovdqu %ymm6, 10*16(%esi); + leal 12*16(%esi), %esi; + + vmovdqa %ymm7, (STACK_CHECKSUM)(%esp); + + cmpl $12, 4+16(%ebp); + jae .Locb_crypt_blk12_loop; + +.align 8 +.Locb_crypt_blk12_cleanup: + vpxor %ymm0, %ymm0, %ymm0; + vmovdqa %ymm0, (STACK_TMP_Y0)(%esp); + vmovdqa %ymm0, (STACK_TMP_Y1)(%esp); + vmovdqa %ymm0, (STACK_TMP_Y2)(%esp); + vmovdqa %ymm0, (STACK_TMP_Y3)(%esp); + vmovdqa %ymm0, (STACK_TMP_Y4)(%esp); + vmovdqa %ymm0, (STACK_TMP_Y5)(%esp); + + /* Process trailing four blocks. */ +.align 8 +.Locb_crypt_blk4: + cmpl $4, 4+16(%ebp); + jb .Locb_crypt_blk1; + + subl $4, 4+16(%ebp); + + movl 4+36(%ebp), %ecx; + vmovdqa (%ecx), %xmm7; /* Preload L[0] */ + + testl $1, %ebx; + jz .Locb_crypt_blk4_nblk_even; + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + leal 1(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+1) + shll $4, %eax; + vmovdqa (STACK_OFFSET_AND_F_KEY)(%esp), %xmm1; + vpxor (%ecx, %eax), %xmm1, %xmm1; + + vpxor %xmm7, %xmm1, %xmm2; + vinserti128 $1, %xmm2, %ymm1, %ymm6; + + leal 3(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+3) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm2, %xmm3; + + leal 4(%ebx), %ebx; + vpxor %xmm7, %xmm3, %xmm4; + vinserti128 $1, %xmm4, %ymm3, %ymm7; + vmovdqa %xmm4, (STACK_OFFSET_AND_F_KEY)(%esp); + + cmpl $1, 4+40(%ebp); + jb .Locb_dec_blk4; + ja .Locb_auth_blk4; + jmp .Locb_enc_blk4; + + .align 8 + .Locb_crypt_blk4_nblk_even: + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + vmovdqa (STACK_OFFSET_AND_F_KEY)(%esp), %xmm1; + vpxor %xmm7, %xmm1, %xmm1; + + leal 2(%ebx), %eax; + tzcntl %eax, %eax; // ntz(blkn+2) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm1, %xmm2; + vinserti128 $1, %xmm2, %ymm1, %ymm6; + + vpxor %xmm7, %xmm2, %xmm3; + + leal 4(%ebx), %ebx; + tzcntl %ebx, %eax; // ntz(blkn+4) + shll $4, %eax; + vpxor (%ecx, %eax), %xmm3, %xmm4; + vinserti128 $1, %xmm4, %ymm3, %ymm7; + vmovdqa %xmm4, (STACK_OFFSET_AND_F_KEY)(%esp); + + cmpl $1, 4+40(%ebp); + jb .Locb_dec_blk4; + ja .Locb_auth_blk4; + + .align 8 + .Locb_enc_blk4: + vmovdqu 0*16(%edx), %ymm1; + vmovdqu 2*16(%edx), %ymm2; + leal 4*16(%edx), %edx; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vpxor %ymm1, %ymm2, %ymm5; + vpxor (STACK_CHECKSUM)(%esp), %ymm5, %ymm5; + vmovdqa %ymm5, (STACK_CHECKSUM)(%esp); + + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + vpxor %ymm6, %ymm1, %ymm1; + vpxor %ymm7, %ymm2, %ymm2; + + /* AES rounds */ + vbroadcasti128 (1 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (2 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (3 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (4 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (5 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (6 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (7 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (8 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (9 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + cmpl $12, 4+20(%ebp); + jb .Locb_enc_blk4_last; + vbroadcasti128 (10 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (11 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + jz .Locb_enc_blk4_last; + vbroadcasti128 (12 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (13 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + + /* Last round and output handling. */ + .Locb_enc_blk4_last: + vmovdqa (STACK_FXL_KEY)(%esp), %ymm0; + vpxor %ymm0, %ymm6, %ymm6; /* Xor offset to last round key. */ + vpxor %ymm0, %ymm7, %ymm7; + vaesenclast %ymm6, %ymm1, %ymm1; + vaesenclast %ymm7, %ymm2, %ymm2; + vmovdqu %ymm1, 0*16(%esi); + vmovdqu %ymm2, 2*16(%esi); + leal 4*16(%esi), %esi; + + jmp .Locb_crypt_blk1; + + .align 8 + .Locb_auth_blk4: + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + vpxor 0*16(%edx), %ymm6, %ymm1; + vpxor 2*16(%edx), %ymm7, %ymm2; + leal 4*16(%edx), %edx; + + /* AES rounds */ + vbroadcasti128 (1 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (2 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (3 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (4 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (5 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (6 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (7 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (8 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (9 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (10 * 16)(%edi), %ymm0; + cmpl $12, 4+20(%ebp); + jb .Locb_auth_blk4_last; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (11 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (12 * 16)(%edi), %ymm0; + jz .Locb_auth_blk4_last; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (13 * 16)(%edi), %ymm0; + VAESENC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (14 * 16)(%edi), %ymm0; + + /* Last round and output handling. */ + .Locb_auth_blk4_last: + vaesenclast %ymm0, %ymm1, %ymm1; + vaesenclast %ymm0, %ymm2, %ymm2; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vpxor %ymm1, %ymm2, %ymm5; + vpxor (STACK_CHECKSUM)(%esp), %ymm5, %ymm5; + vmovdqa %ymm5, (STACK_CHECKSUM)(%esp); + + jmp .Locb_crypt_blk1; + + .align 8 + .Locb_dec_blk4: + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + vpxor 0*16(%edx), %ymm6, %ymm1; + vpxor 2*16(%edx), %ymm7, %ymm2; + leal 4*16(%edx), %edx; + + /* AES rounds */ + vbroadcasti128 (1 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (2 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (3 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (4 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (5 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (6 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (7 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (8 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (9 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + cmpl $12, 4+20(%ebp); + jb .Locb_dec_blk4_last; + vbroadcasti128 (10 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (11 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + jz .Locb_dec_blk4_last; + vbroadcasti128 (12 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + vbroadcasti128 (13 * 16)(%edi), %ymm0; + VAESDEC2(%ymm0, %ymm1, %ymm2); + + /* Last round and output handling. */ + .Locb_dec_blk4_last: + vmovdqa (STACK_FXL_KEY)(%esp), %ymm0; + vpxor %ymm0, %ymm6, %ymm6; /* Xor offset to last round key. */ + vpxor %ymm0, %ymm7, %ymm7; + vaesdeclast %ymm6, %ymm1, %ymm1; + vaesdeclast %ymm7, %ymm2, %ymm2; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vpxor %ymm1, %ymm2, %ymm5; + vpxor (STACK_CHECKSUM)(%esp), %ymm5, %ymm5; + + vmovdqu %ymm1, 0*16(%esi); + vmovdqu %ymm2, 2*16(%esi); + leal 4*16(%esi), %esi; + + vmovdqa %ymm5, (STACK_CHECKSUM)(%esp); + + /* Process trailing one to three blocks, one per loop. */ +.align 8 +.Locb_crypt_blk1: + cmpl $1, 4+16(%ebp); + jb .Locb_crypt_done; + + subl $1, 4+16(%ebp); + + movl 4+36(%ebp), %ecx; + leal 1(%ebx), %ebx; + tzcntl %ebx, %eax; // ntz(blkn+1) + shll $4, %eax; + vmovdqa (STACK_OFFSET_AND_F_KEY)(%esp), %xmm7; + vpxor (%ecx, %eax), %xmm7, %xmm7; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + vmovdqa %xmm7, (STACK_OFFSET_AND_F_KEY)(%esp); + + cmpl $1, 4+40(%ebp); + jb .Locb_dec_blk1; + ja .Locb_auth_blk1; + vmovdqu (%edx), %xmm0; + leal 16(%edx), %edx; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vpxor (STACK_CHECKSUM)(%esp), %xmm0, %xmm1; + vmovdqa %xmm1, (STACK_CHECKSUM)(%esp); + + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + vpxor %xmm7, %xmm0, %xmm0; + + /* AES rounds. */ + vaesenc (1 * 16)(%edi), %xmm0, %xmm0; + vaesenc (2 * 16)(%edi), %xmm0, %xmm0; + vaesenc (3 * 16)(%edi), %xmm0, %xmm0; + vaesenc (4 * 16)(%edi), %xmm0, %xmm0; + vaesenc (5 * 16)(%edi), %xmm0, %xmm0; + vaesenc (6 * 16)(%edi), %xmm0, %xmm0; + vaesenc (7 * 16)(%edi), %xmm0, %xmm0; + vaesenc (8 * 16)(%edi), %xmm0, %xmm0; + vaesenc (9 * 16)(%edi), %xmm0, %xmm0; + cmpl $12, 4+20(%ebp); + jb .Locb_enc_blk1_last; + vaesenc (10 * 16)(%edi), %xmm0, %xmm0; + vaesenc (11 * 16)(%edi), %xmm0, %xmm0; + jz .Locb_enc_blk1_last; + vaesenc (12 * 16)(%edi), %xmm0, %xmm0; + vaesenc (13 * 16)(%edi), %xmm0, %xmm0; + + /* Last round and output handling. */ + .Locb_enc_blk1_last: + vpxor (STACK_FXL_KEY)(%esp), %xmm7, %xmm1; + vaesenclast %xmm1, %xmm0, %xmm0; + vmovdqu %xmm0, (%esi); + leal 16(%esi), %esi; + + jmp .Locb_crypt_blk1; + + .align 8 + .Locb_auth_blk1: + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + vpxor (%edx), %xmm7, %xmm0; + leal 16(%edx), %edx; + + /* AES rounds. */ + vaesenc (1 * 16)(%edi), %xmm0, %xmm0; + vaesenc (2 * 16)(%edi), %xmm0, %xmm0; + vaesenc (3 * 16)(%edi), %xmm0, %xmm0; + vaesenc (4 * 16)(%edi), %xmm0, %xmm0; + vaesenc (5 * 16)(%edi), %xmm0, %xmm0; + vaesenc (6 * 16)(%edi), %xmm0, %xmm0; + vaesenc (7 * 16)(%edi), %xmm0, %xmm0; + vaesenc (8 * 16)(%edi), %xmm0, %xmm0; + vaesenc (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + cmpl $12, 4+20(%ebp); + jb .Locb_auth_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Locb_auth_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + + /* Last round and output handling. */ + .Locb_auth_blk1_last: + vpxor (STACK_CHECKSUM)(%esp), %xmm1, %xmm1; + vaesenclast %xmm1, %xmm0, %xmm0; + vmovdqa %xmm0, (STACK_CHECKSUM)(%esp); + + jmp .Locb_crypt_blk1; + + .align 8 + .Locb_dec_blk1: + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + vpxor (%edx), %xmm7, %xmm0; + leal 16(%edx), %edx; + + /* AES rounds. */ + vaesdec (1 * 16)(%edi), %xmm0, %xmm0; + vaesdec (2 * 16)(%edi), %xmm0, %xmm0; + vaesdec (3 * 16)(%edi), %xmm0, %xmm0; + vaesdec (4 * 16)(%edi), %xmm0, %xmm0; + vaesdec (5 * 16)(%edi), %xmm0, %xmm0; + vaesdec (6 * 16)(%edi), %xmm0, %xmm0; + vaesdec (7 * 16)(%edi), %xmm0, %xmm0; + vaesdec (8 * 16)(%edi), %xmm0, %xmm0; + vaesdec (9 * 16)(%edi), %xmm0, %xmm0; + cmpl $12, 4+20(%ebp); + jb .Locb_dec_blk1_last; + vaesdec (10 * 16)(%edi), %xmm0, %xmm0; + vaesdec (11 * 16)(%edi), %xmm0, %xmm0; + jz .Locb_dec_blk1_last; + vaesdec (12 * 16)(%edi), %xmm0, %xmm0; + vaesdec (13 * 16)(%edi), %xmm0, %xmm0; + + /* Last round and output handling. */ + .Locb_dec_blk1_last: + vpxor (STACK_FXL_KEY)(%esp), %xmm7, %xmm1; + vaesdeclast %xmm1, %xmm0, %xmm0; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vpxor (STACK_CHECKSUM)(%esp), %xmm0, %xmm1; + + vmovdqu %xmm0, (%esi); + leal 16(%esi), %esi; + + vmovdqa %xmm1, (STACK_CHECKSUM)(%esp); + + jmp .Locb_crypt_blk1; + +.align 8 +.Locb_crypt_done: + movl 4+24(%ebp), %ecx; + vmovdqa (STACK_OFFSET_AND_F_KEY)(%esp), %xmm1; + vpxor (%edi), %xmm1, %xmm1; + vmovdqu %xmm1, (%ecx); + + movl 4+28(%ebp), %eax; + vmovdqa (STACK_CHECKSUM)(%esp), %xmm2; + vpxor (STACK_CHECKSUM + 16)(%esp), %xmm2, %xmm2; + vpxor (%eax), %xmm2, %xmm2; + vmovdqu %xmm2, (%eax); + + movl (STACK_GPR_POS + 0 * 4)(%esp), %edi; + CFI_RESTORE(edi); + movl (STACK_GPR_POS + 1 * 4)(%esp), %esi; + CFI_RESTORE(esi); + movl (STACK_GPR_POS + 2 * 4)(%esp), %ebx; + CFI_RESTORE(ebx); + + vpxor %ymm0, %ymm0, %ymm0; + vmovdqa %ymm0, (STACK_OFFSET_AND_F_KEY)(%esp); + vmovdqa %ymm0, (STACK_CHECKSUM)(%esp); + + xorl %eax, %eax; + leave; + CFI_LEAVE(); + vzeroall; + ret_spec_stop + CFI_ENDPROC(); +ELF(.size SYM_NAME(_gcry_vaes_avx2_ocb_crypt_i386), + .-SYM_NAME(_gcry_vaes_avx2_ocb_crypt_i386)) + +/********************************************************************** + XTS-mode encryption + **********************************************************************/ +ELF(.type SYM_NAME(_gcry_vaes_avx2_xts_crypt_i386),@function) +.globl SYM_NAME(_gcry_vaes_avx2_xts_crypt_i386) +.align 16 +SYM_NAME(_gcry_vaes_avx2_xts_crypt_i386): + /* input: + * (esp + 4): round keys + * (esp + 8): tweak + * (esp + 12): dst + * (esp + 16): src + * (esp + 20): nblocks + * (esp + 24): nrounds + * (esp + 28): encrypt + */ + CFI_STARTPROC(); + + GET_DATA_POINTER(SYM_NAME(_gcry_vaes_consts), eax); + + pushl %ebp; + CFI_PUSH(%ebp); + movl %esp, %ebp; + CFI_DEF_CFA_REGISTER(%ebp); + + subl $(4 * 32 + 3 * 4), %esp; + andl $-32, %esp; + + movl %edi, (4 * 32 + 0 * 4)(%esp); + CFI_REG_ON_STACK(edi, 4 * 32 + 0 * 4); + movl %esi, (4 * 32 + 1 * 4)(%esp); + CFI_REG_ON_STACK(esi, 4 * 32 + 1 * 4); + movl %ebx, (4 * 32 + 2 * 4)(%esp); + CFI_REG_ON_STACK(ebx, 4 * 32 + 2 * 4); + + movl %eax, %ebx; + movl 4+4(%ebp), %edi; + movl 4+8(%ebp), %esi; + movl 4+12(%ebp), %edx; + movl 4+16(%ebp), %ecx; + movl 4+20(%ebp), %eax; + +#define tweak_clmul(shift, out, tweak, hi_tweak, tmp1, tmp2) \ + vpsrld $(32-(shift)), hi_tweak, tmp2; \ + vpsllq $(shift), tweak, out; \ + vpclmulqdq $0, CADDR(.Lxts_gfmul_clmul, %ebx), tmp2, tmp1; \ + vpunpckhqdq tmp2, tmp1, tmp1; \ + vpxor tmp1, out, out; + + /* Prepare tweak. */ + vmovdqu (%esi), %xmm7; + vpshufb CADDR(.Lxts_high_bit_shuf, %ebx), %xmm7, %xmm6; + tweak_clmul(1, %xmm5, %xmm7, %xmm6, %xmm0, %xmm1); + vinserti128 $1, %xmm5, %ymm7, %ymm7; /* tweak:tweak1 */ + vpshufb CADDR(.Lxts_high_bit_shuf, %ebx), %ymm7, %ymm6; + + /* Process eight blocks per loop. */ +.align 8 +.Lxts_crypt_blk8: + cmpl $8, %eax; + jb .Lxts_crypt_blk4; + + leal -8(%eax), %eax; + + vmovdqa %ymm7, (0 * 32)(%esp); + tweak_clmul(2, %ymm2, %ymm7, %ymm6, %ymm0, %ymm1); + vmovdqa %ymm2, (1 * 32)(%esp); + tweak_clmul(4, %ymm2, %ymm7, %ymm6, %ymm0, %ymm1); + vmovdqa %ymm2, (2 * 32)(%esp); + tweak_clmul(6, %ymm2, %ymm7, %ymm6, %ymm0, %ymm1); + vmovdqa %ymm2, (3 * 32)(%esp); + tweak_clmul(8, %ymm7, %ymm7, %ymm6, %ymm0, %ymm1); + vpshufb CADDR(.Lxts_high_bit_shuf, %ebx), %ymm7, %ymm6; + + vbroadcasti128 (0 * 16)(%edi), %ymm4; + vmovdqa (0 * 32)(%esp), %ymm0; + vmovdqa (1 * 32)(%esp), %ymm1; + vmovdqa (2 * 32)(%esp), %ymm2; + vmovdqa (3 * 32)(%esp), %ymm3; + vpxor (0 * 16)(%ecx), %ymm0, %ymm0; + vpxor (2 * 16)(%ecx), %ymm1, %ymm1; + vpxor (4 * 16)(%ecx), %ymm2, %ymm2; + vpxor (6 * 16)(%ecx), %ymm3, %ymm3; + + leal (8 * 16)(%ecx), %ecx; + + cmpl $1, 4+28(%ebp); + jne .Lxts_dec_blk8; + /* AES rounds */ + XOR4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 4+24(%ebp); + jb .Lxts_enc_blk8_last; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lxts_enc_blk8_last; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lxts_enc_blk8_last: + vpxor (0 * 32)(%esp), %ymm4, %ymm5; /* Xor tweak to last round key. */ + vaesenclast %ymm5, %ymm0, %ymm0; + vpxor (1 * 32)(%esp), %ymm4, %ymm5; + vaesenclast %ymm5, %ymm1, %ymm1; + vpxor (2 * 32)(%esp), %ymm4, %ymm5; + vpxor (3 * 32)(%esp), %ymm4, %ymm4; + vaesenclast %ymm5, %ymm2, %ymm2; + vaesenclast %ymm4, %ymm3, %ymm3; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + vmovdqu %ymm2, (4 * 16)(%edx); + vmovdqu %ymm3, (6 * 16)(%edx); + leal (8 * 16)(%edx), %edx; + + jmp .Lxts_crypt_blk8; + + .align 8 + .Lxts_dec_blk8: + /* AES rounds */ + XOR4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 4+24(%ebp); + jb .Lxts_dec_blk8_last; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lxts_dec_blk8_last; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lxts_dec_blk8_last: + vpxor (0 * 32)(%esp), %ymm4, %ymm5; /* Xor tweak to last round key. */ + vaesdeclast %ymm5, %ymm0, %ymm0; + vpxor (1 * 32)(%esp), %ymm4, %ymm5; + vaesdeclast %ymm5, %ymm1, %ymm1; + vpxor (2 * 32)(%esp), %ymm4, %ymm5; + vpxor (3 * 32)(%esp), %ymm4, %ymm4; + vaesdeclast %ymm5, %ymm2, %ymm2; + vaesdeclast %ymm4, %ymm3, %ymm3; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + vmovdqu %ymm2, (4 * 16)(%edx); + vmovdqu %ymm3, (6 * 16)(%edx); + leal (8 * 16)(%edx), %edx; + + jmp .Lxts_crypt_blk8; + + /* Handle trailing four blocks. */ +.align 8 +.Lxts_crypt_blk4: + /* Try exit early as typically input length is large power of 2. */ + cmpl $1, %eax; + jb .Ldone_xts_crypt; + cmpl $4, %eax; + jb .Lxts_crypt_blk1; + + leal -4(%eax), %eax; + + vmovdqa %ymm7, %ymm2; + tweak_clmul(2, %ymm3, %ymm7, %ymm6, %ymm0, %ymm1); + tweak_clmul(4, %ymm7, %ymm7, %ymm6, %ymm0, %ymm1); + vpshufb CADDR(.Lxts_high_bit_shuf, %ebx), %ymm7, %ymm6; + + vbroadcasti128 (0 * 16)(%edi), %ymm4; + vpxor (0 * 16)(%ecx), %ymm2, %ymm0; + vpxor (2 * 16)(%ecx), %ymm3, %ymm1; + + leal (4 * 16)(%ecx), %ecx; + + cmpl $1, 4+28(%ebp); + jne .Lxts_dec_blk4; + /* AES rounds */ + XOR2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 4+24(%ebp); + jb .Lxts_enc_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lxts_enc_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lxts_enc_blk4_last: + vpxor %ymm4, %ymm2, %ymm2; /* Xor tweak to last round key. */ + vpxor %ymm4, %ymm3, %ymm3; + vaesenclast %ymm2, %ymm0, %ymm0; + vaesenclast %ymm3, %ymm1, %ymm1; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + leal (4 * 16)(%edx), %edx; + + jmp .Lxts_crypt_blk1; + + .align 8 + .Lxts_dec_blk4: + /* AES rounds */ + XOR2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (1 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 4+24(%ebp); + jb .Lxts_dec_blk4_last; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lxts_dec_blk4_last; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + + /* Last round and output handling. */ + .Lxts_dec_blk4_last: + vpxor %ymm4, %ymm2, %ymm2; /* Xor tweak to last round key. */ + vpxor %ymm4, %ymm3, %ymm3; + vaesdeclast %ymm2, %ymm0, %ymm0; + vaesdeclast %ymm3, %ymm1, %ymm1; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + leal (4 * 16)(%edx), %edx; + + /* Process trailing one to three blocks, one per loop. */ +.align 8 +.Lxts_crypt_blk1: + cmpl $1, %eax; + jb .Ldone_xts_crypt; + + leal -1(%eax), %eax; + + vpxor (%ecx), %xmm7, %xmm0; + vmovdqa %xmm7, %xmm5; + tweak_clmul(1, %xmm7, %xmm7, %xmm6, %xmm2, %xmm3); + vpshufb CADDR(.Lxts_high_bit_shuf, %ebx), %xmm7, %xmm6; + + leal 16(%ecx), %ecx; + + cmpl $1, 4+28(%ebp); + jne .Lxts_dec_blk1; + /* AES rounds. */ + vpxor (0 * 16)(%edi), %xmm0, %xmm0; + vaesenc (1 * 16)(%edi), %xmm0, %xmm0; + vaesenc (2 * 16)(%edi), %xmm0, %xmm0; + vaesenc (3 * 16)(%edi), %xmm0, %xmm0; + vaesenc (4 * 16)(%edi), %xmm0, %xmm0; + vaesenc (5 * 16)(%edi), %xmm0, %xmm0; + vaesenc (6 * 16)(%edi), %xmm0, %xmm0; + vaesenc (7 * 16)(%edi), %xmm0, %xmm0; + vaesenc (8 * 16)(%edi), %xmm0, %xmm0; + vaesenc (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + cmpl $12, 4+24(%ebp); + jb .Lxts_enc_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Lxts_enc_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + + /* Last round and output handling. */ + .Lxts_enc_blk1_last: + vpxor %xmm1, %xmm5, %xmm5; /* Xor tweak to last round key. */ + vaesenclast %xmm5, %xmm0, %xmm0; + vmovdqu %xmm0, (%edx); + leal 16(%edx), %edx; + + jmp .Lxts_crypt_blk1; + + .align 8 + .Lxts_dec_blk1: + /* AES rounds. */ + vpxor (0 * 16)(%edi), %xmm0, %xmm0; + vaesdec (1 * 16)(%edi), %xmm0, %xmm0; + vaesdec (2 * 16)(%edi), %xmm0, %xmm0; + vaesdec (3 * 16)(%edi), %xmm0, %xmm0; + vaesdec (4 * 16)(%edi), %xmm0, %xmm0; + vaesdec (5 * 16)(%edi), %xmm0, %xmm0; + vaesdec (6 * 16)(%edi), %xmm0, %xmm0; + vaesdec (7 * 16)(%edi), %xmm0, %xmm0; + vaesdec (8 * 16)(%edi), %xmm0, %xmm0; + vaesdec (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + cmpl $12, 4+24(%ebp); + jb .Lxts_dec_blk1_last; + vaesdec %xmm1, %xmm0, %xmm0; + vaesdec (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Lxts_dec_blk1_last; + vaesdec %xmm1, %xmm0, %xmm0; + vaesdec (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + + /* Last round and output handling. */ + .Lxts_dec_blk1_last: + vpxor %xmm1, %xmm5, %xmm5; /* Xor tweak to last round key. */ + vaesdeclast %xmm5, %xmm0, %xmm0; + vmovdqu %xmm0, (%edx); + leal 16(%edx), %edx; + + jmp .Lxts_crypt_blk1; + +.align 8 +.Ldone_xts_crypt: + /* Store IV. */ + vmovdqu %xmm7, (%esi); + + vpxor %ymm0, %ymm0, %ymm0; + movl (4 * 32 + 0 * 4)(%esp), %edi; + CFI_RESTORE(edi); + movl (4 * 32 + 1 * 4)(%esp), %esi; + CFI_RESTORE(esi); + movl (4 * 32 + 2 * 4)(%esp), %ebx; + CFI_RESTORE(ebx); + vmovdqa %ymm0, (0 * 32)(%esp); + vmovdqa %ymm0, (1 * 32)(%esp); + vmovdqa %ymm0, (2 * 32)(%esp); + vmovdqa %ymm0, (3 * 32)(%esp); + leave; + CFI_LEAVE(); + vzeroall; + xorl %eax, %eax; + ret_spec_stop + CFI_ENDPROC(); +ELF(.size SYM_NAME(_gcry_vaes_avx2_xts_crypt_i386), + .-SYM_NAME(_gcry_vaes_avx2_xts_crypt_i386)) + +/********************************************************************** + ECB-mode encryption + **********************************************************************/ +ELF(.type SYM_NAME(_gcry_vaes_avx2_ecb_crypt_i386),@function) +.globl SYM_NAME(_gcry_vaes_avx2_ecb_crypt_i386) +.align 16 +SYM_NAME(_gcry_vaes_avx2_ecb_crypt_i386): + /* input: + * (esp + 4): round keys + * (esp + 8): encrypt + * (esp + 12): dst + * (esp + 16): src + * (esp + 20): nblocks + * (esp + 24): nrounds + */ + CFI_STARTPROC(); + pushl %edi; + CFI_PUSH(%edi); + pushl %esi; + CFI_PUSH(%esi); + + movl 8+4(%esp), %edi; + movl 8+8(%esp), %esi; + movl 8+12(%esp), %edx; + movl 8+16(%esp), %ecx; + movl 8+20(%esp), %eax; + + /* Process 8 blocks per loop. */ +.align 8 +.Lecb_blk8: + cmpl $8, %eax; + jb .Lecb_blk4; + + leal -8(%eax), %eax; + + /* Load input and xor first key. */ + vbroadcasti128 (0 * 16)(%edi), %ymm4; + vmovdqu (0 * 16)(%ecx), %ymm0; + vmovdqu (2 * 16)(%ecx), %ymm1; + vmovdqu (4 * 16)(%ecx), %ymm2; + vmovdqu (6 * 16)(%ecx), %ymm3; + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm4, %ymm1, %ymm1; + vpxor %ymm4, %ymm2, %ymm2; + vpxor %ymm4, %ymm3, %ymm3; + vbroadcasti128 (1 * 16)(%edi), %ymm4; + leal (8 * 16)(%ecx), %ecx; + + testl %esi, %esi; + jz .Lecb_dec_blk8; + /* AES rounds */ + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 8+24(%esp); + jb .Lecb_enc_blk8_last; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lecb_enc_blk8_last; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + .Lecb_enc_blk8_last: + vaesenclast %ymm4, %ymm0, %ymm0; + vaesenclast %ymm4, %ymm1, %ymm1; + vaesenclast %ymm4, %ymm2, %ymm2; + vaesenclast %ymm4, %ymm3, %ymm3; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + vmovdqu %ymm2, (4 * 16)(%edx); + vmovdqu %ymm3, (6 * 16)(%edx); + leal (8 * 16)(%edx), %edx; + jmp .Lecb_blk8; + + .align 8 + .Lecb_dec_blk8: + /* AES rounds */ + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 8+24(%esp); + jb .Lecb_dec_blk8_last; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lecb_dec_blk8_last; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + .Lecb_dec_blk8_last: + vaesdeclast %ymm4, %ymm0, %ymm0; + vaesdeclast %ymm4, %ymm1, %ymm1; + vaesdeclast %ymm4, %ymm2, %ymm2; + vaesdeclast %ymm4, %ymm3, %ymm3; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + vmovdqu %ymm2, (4 * 16)(%edx); + vmovdqu %ymm3, (6 * 16)(%edx); + leal (8 * 16)(%edx), %edx; + jmp .Lecb_blk8; + + /* Handle trailing four blocks. */ +.align 8 +.Lecb_blk4: + cmpl $4, %eax; + jb .Lecb_blk1; + + leal -4(%eax), %eax; + + /* Load input and xor first key. */ + vbroadcasti128 (0 * 16)(%edi), %ymm4; + vmovdqu (0 * 16)(%ecx), %ymm0; + vmovdqu (2 * 16)(%ecx), %ymm1; + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm4, %ymm1, %ymm1; + vbroadcasti128 (1 * 16)(%edi), %ymm4; + leal (4 * 16)(%ecx), %ecx; + + testl %esi, %esi; + jz .Lecb_dec_blk4; + /* AES rounds */ + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 8+24(%esp); + jb .Lecb_enc_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lecb_enc_blk4_last; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESENC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + .Lecb_enc_blk4_last: + vaesenclast %ymm4, %ymm0, %ymm0; + vaesenclast %ymm4, %ymm1, %ymm1; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + leal (4 * 16)(%edx), %edx; + jmp .Lecb_blk1; + + .align 8 + .Lecb_dec_blk4: + /* AES rounds */ + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (2 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (3 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (4 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (5 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (6 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (7 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (8 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (9 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (10 * 16)(%edi), %ymm4; + cmpl $12, 8+24(%esp); + jb .Lecb_dec_blk4_last; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (11 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (12 * 16)(%edi), %ymm4; + jz .Lecb_dec_blk4_last; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (13 * 16)(%edi), %ymm4; + VAESDEC2(%ymm4, %ymm0, %ymm1); + vbroadcasti128 (14 * 16)(%edi), %ymm4; + .Lecb_dec_blk4_last: + vaesdeclast %ymm4, %ymm0, %ymm0; + vaesdeclast %ymm4, %ymm1, %ymm1; + vmovdqu %ymm0, (0 * 16)(%edx); + vmovdqu %ymm1, (2 * 16)(%edx); + leal (4 * 16)(%edx), %edx; + + /* Process trailing one to three blocks, one per loop. */ +.align 8 +.Lecb_blk1: + cmpl $1, %eax; + jb .Ldone_ecb; + + leal -1(%eax), %eax; + + /* Load input. */ + vmovdqu (%ecx), %xmm2; + leal 16(%ecx), %ecx; + + /* Xor first key. */ + vpxor (0 * 16)(%edi), %xmm2, %xmm0; + + testl %esi, %esi; + jz .Lecb_dec_blk1; + /* AES rounds. */ + vaesenc (1 * 16)(%edi), %xmm0, %xmm0; + vaesenc (2 * 16)(%edi), %xmm0, %xmm0; + vaesenc (3 * 16)(%edi), %xmm0, %xmm0; + vaesenc (4 * 16)(%edi), %xmm0, %xmm0; + vaesenc (5 * 16)(%edi), %xmm0, %xmm0; + vaesenc (6 * 16)(%edi), %xmm0, %xmm0; + vaesenc (7 * 16)(%edi), %xmm0, %xmm0; + vaesenc (8 * 16)(%edi), %xmm0, %xmm0; + vaesenc (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + cmpl $12, 8+24(%esp); + jb .Lecb_enc_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Lecb_enc_blk1_last; + vaesenc %xmm1, %xmm0, %xmm0; + vaesenc (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + .Lecb_enc_blk1_last: + vaesenclast %xmm1, %xmm0, %xmm0; + jmp .Lecb_blk1_end; + + .align 8 + .Lecb_dec_blk1: + /* AES rounds. */ + vaesdec (1 * 16)(%edi), %xmm0, %xmm0; + vaesdec (2 * 16)(%edi), %xmm0, %xmm0; + vaesdec (3 * 16)(%edi), %xmm0, %xmm0; + vaesdec (4 * 16)(%edi), %xmm0, %xmm0; + vaesdec (5 * 16)(%edi), %xmm0, %xmm0; + vaesdec (6 * 16)(%edi), %xmm0, %xmm0; + vaesdec (7 * 16)(%edi), %xmm0, %xmm0; + vaesdec (8 * 16)(%edi), %xmm0, %xmm0; + vaesdec (9 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (10 * 16)(%edi), %xmm1; + cmpl $12, 8+24(%esp); + jb .Lecb_dec_blk1_last; + vaesdec %xmm1, %xmm0, %xmm0; + vaesdec (11 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (12 * 16)(%edi), %xmm1; + jz .Lecb_dec_blk1_last; + vaesdec %xmm1, %xmm0, %xmm0; + vaesdec (13 * 16)(%edi), %xmm0, %xmm0; + vmovdqa (14 * 16)(%edi), %xmm1; + .Lecb_dec_blk1_last: + vaesdeclast %xmm1, %xmm0, %xmm0; + jmp .Lecb_blk1_end; + + .align 8 + .Lecb_blk1_end: + vmovdqu %xmm0, (%edx); + leal 16(%edx), %edx; + + jmp .Lecb_blk1; + +.align 8 +.Ldone_ecb: + popl %esi; + CFI_POP(%esi); + popl %edi; + CFI_POP(%edi); + vzeroall; + ret_spec_stop + CFI_ENDPROC(); +ELF(.size SYM_NAME(_gcry_vaes_avx2_ecb_crypt_i386), + .-SYM_NAME(_gcry_vaes_avx2_ecb_crypt_i386)) + +/********************************************************************** + constants + **********************************************************************/ +SECTION_RODATA + +ELF(.type SYM_NAME(_gcry_vaes_consts),@object) +.align 32 +SYM_NAME(_gcry_vaes_consts): +.Lbige_addb_0: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lbige_addb_1: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 +.Lbige_addb_2: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 +.Lbige_addb_3: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 +.Lbige_addb_4: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 +.Lbige_addb_5: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5 +.Lbige_addb_6: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 +.Lbige_addb_7: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7 +.Lbige_addb_8: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 +.Lbige_addb_9: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 +.Lbige_addb_10: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10 +.Lbige_addb_11: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11 + +.Lle_addd_0: + .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_1: + .byte 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_2: + .byte 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_3: + .byte 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_4: + .byte 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_5: + .byte 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_6: + .byte 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_7: + .byte 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_8: + .byte 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_9: + .byte 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_10: + .byte 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_11: + .byte 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + +.Lle_addd_4_2: + .byte 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +.Lle_addd_12_2: + .byte 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + .byte 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + +.Lxts_gfmul_clmul: + .long 0x00, 0x87, 0x00, 0x00 + .long 0x00, 0x87, 0x00, 0x00 +.Lxts_high_bit_shuf: + .byte -1, -1, -1, -1, 12, 13, 14, 15 + .byte 4, 5, 6, 7, -1, -1, -1, -1 + .byte -1, -1, -1, -1, 12, 13, 14, 15 + .byte 4, 5, 6, 7, -1, -1, -1, -1 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +ELF(.size SYM_NAME(_gcry_vaes_consts),.-SYM_NAME(_gcry_vaes_consts)) + +#endif /* HAVE_GCC_INLINE_ASM_VAES */ +#endif /* __i386__ */ diff --git a/cipher/rijndael-vaes.c b/cipher/rijndael-vaes-i386.c similarity index 55% copy from cipher/rijndael-vaes.c copy to cipher/rijndael-vaes-i386.c index ce9e18e7..e10d3ac7 100644 --- a/cipher/rijndael-vaes.c +++ b/cipher/rijndael-vaes-i386.c @@ -1,240 +1,231 @@ -/* VAES/AVX2 accelerated AES for Libgcrypt - * Copyright (C) 2021 Jussi Kivilinna +/* VAES/AVX2 i386 accelerated AES for Libgcrypt + * Copyright (C) 2023 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * */ #include #include #include #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "rijndael-internal.h" #include "./cipher-internal.h" -#ifdef USE_VAES - - -# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS -# define ASM_FUNC_ABI __attribute__((sysv_abi)) -# else -# define ASM_FUNC_ABI -# endif - - -extern void _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx); - - -extern void _gcry_vaes_avx2_cbc_dec_amd64 (const void *keysched, - unsigned char *iv, - void *outbuf_arg, - const void *inbuf_arg, - size_t nblocks, - unsigned int nrounds) ASM_FUNC_ABI; - -extern void _gcry_vaes_avx2_cfb_dec_amd64 (const void *keysched, - unsigned char *iv, - void *outbuf_arg, - const void *inbuf_arg, - size_t nblocks, - unsigned int nrounds) ASM_FUNC_ABI; - -extern void _gcry_vaes_avx2_ctr_enc_amd64 (const void *keysched, - unsigned char *ctr, - void *outbuf_arg, - const void *inbuf_arg, - size_t nblocks, - unsigned int nrounds) ASM_FUNC_ABI; - -extern void _gcry_vaes_avx2_ctr32le_enc_amd64 (const void *keysched, - unsigned char *ctr, - void *outbuf_arg, - const void *inbuf_arg, - size_t nblocks, - unsigned int nrounds) - ASM_FUNC_ABI; - -extern size_t _gcry_vaes_avx2_ocb_crypt_amd64 (const void *keysched, - unsigned int blkn, - void *outbuf_arg, - const void *inbuf_arg, - size_t nblocks, - unsigned int nrounds, - unsigned char *offset, - unsigned char *checksum, - unsigned char *L_table, - int encrypt) ASM_FUNC_ABI; - -extern void _gcry_vaes_avx2_xts_crypt_amd64 (const void *keysched, - unsigned char *tweak, - void *outbuf_arg, - const void *inbuf_arg, - size_t nblocks, - unsigned int nrounds, - int encrypt) ASM_FUNC_ABI; - -extern void _gcry_vaes_avx2_ecb_crypt_amd64 (const void *keysched, - int encrypt, - void *outbuf_arg, - const void *inbuf_arg, - size_t nblocks, - unsigned int nrounds) ASM_FUNC_ABI; +#ifdef USE_VAES_I386 + + +extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx); + + +extern void _gcry_vaes_avx2_cbc_dec_i386 (const void *keysched, + unsigned char *iv, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, + unsigned int nrounds); + +extern void _gcry_vaes_avx2_cfb_dec_i386 (const void *keysched, + unsigned char *iv, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, + unsigned int nrounds); + +extern void _gcry_vaes_avx2_ctr_enc_i386 (const void *keysched, + unsigned char *ctr, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, + unsigned int nrounds); + +extern void _gcry_vaes_avx2_ctr32le_enc_i386 (const void *keysched, + unsigned char *ctr, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, + unsigned int nrounds); + +extern size_t _gcry_vaes_avx2_ocb_crypt_i386 (const void *keysched, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, + unsigned int nrounds, + unsigned char *offset, + unsigned char *checksum, + unsigned int blkn, + const void *L_table, + int enc_dec_or_auth); + +extern void _gcry_vaes_avx2_xts_crypt_i386 (const void *keysched, + unsigned char *tweak, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, + unsigned int nrounds, + int encrypt); + +extern void _gcry_vaes_avx2_ecb_crypt_i386 (const void *keysched, + int encrypt, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, + unsigned int nrounds); void _gcry_aes_vaes_ecb_crypt (void *context, void *outbuf, const void *inbuf, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = context; const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; unsigned int nrounds = ctx->rounds; if (!encrypt && !ctx->decryption_prepared) { _gcry_aes_aesni_prepare_decryption (ctx); ctx->decryption_prepared = 1; } - _gcry_vaes_avx2_ecb_crypt_amd64 (keysched, encrypt, outbuf, inbuf, + _gcry_vaes_avx2_ecb_crypt_i386 (keysched, encrypt, outbuf, inbuf, nblocks, nrounds); } void _gcry_aes_vaes_cbc_dec (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks) { RIJNDAEL_context *ctx = context; const void *keysched = ctx->keyschdec32; unsigned int nrounds = ctx->rounds; if (!ctx->decryption_prepared) { _gcry_aes_aesni_prepare_decryption (ctx); ctx->decryption_prepared = 1; } - _gcry_vaes_avx2_cbc_dec_amd64 (keysched, iv, outbuf, inbuf, nblocks, nrounds); + _gcry_vaes_avx2_cbc_dec_i386 (keysched, iv, outbuf, inbuf, nblocks, nrounds); } void _gcry_aes_vaes_cfb_dec (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks) { RIJNDAEL_context *ctx = context; const void *keysched = ctx->keyschenc32; unsigned int nrounds = ctx->rounds; - _gcry_vaes_avx2_cfb_dec_amd64 (keysched, iv, outbuf, inbuf, nblocks, nrounds); + _gcry_vaes_avx2_cfb_dec_i386 (keysched, iv, outbuf, inbuf, nblocks, nrounds); } void _gcry_aes_vaes_ctr_enc (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks) { RIJNDAEL_context *ctx = context; const void *keysched = ctx->keyschenc32; unsigned int nrounds = ctx->rounds; - _gcry_vaes_avx2_ctr_enc_amd64 (keysched, iv, outbuf, inbuf, nblocks, nrounds); + _gcry_vaes_avx2_ctr_enc_i386 (keysched, iv, outbuf, inbuf, nblocks, nrounds); } void _gcry_aes_vaes_ctr32le_enc (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks) { RIJNDAEL_context *ctx = context; const void *keysched = ctx->keyschenc32; unsigned int nrounds = ctx->rounds; - _gcry_vaes_avx2_ctr32le_enc_amd64 (keysched, iv, outbuf, inbuf, nblocks, + _gcry_vaes_avx2_ctr32le_enc_i386 (keysched, iv, outbuf, inbuf, nblocks, nrounds); } size_t _gcry_aes_vaes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = (void *)&c->context.c; const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; - unsigned int nrounds = ctx->rounds; - u64 blkn = c->u_mode.ocb.data_nblocks; + u64 blkn; if (!encrypt && !ctx->decryption_prepared) { _gcry_aes_aesni_prepare_decryption (ctx); ctx->decryption_prepared = 1; } + blkn = c->u_mode.ocb.data_nblocks; c->u_mode.ocb.data_nblocks = blkn + nblocks; - return _gcry_vaes_avx2_ocb_crypt_amd64 (keysched, (unsigned int)blkn, outbuf, - inbuf, nblocks, nrounds, c->u_iv.iv, - c->u_ctr.ctr, c->u_mode.ocb.L[0], - encrypt); + return _gcry_vaes_avx2_ocb_crypt_i386 (keysched, outbuf, inbuf, nblocks, + ctx->rounds, c->u_iv.iv, c->u_ctr.ctr, + (unsigned int)blkn, + &c->u_mode.ocb.L[0], encrypt); } size_t _gcry_aes_vaes_ocb_auth (gcry_cipher_hd_t c, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const void *keysched = ctx->keyschenc32; const unsigned char *inbuf = inbuf_arg; - unsigned int nrounds = ctx->rounds; u64 blkn = c->u_mode.ocb.aad_nblocks; c->u_mode.ocb.aad_nblocks = blkn + nblocks; - return _gcry_vaes_avx2_ocb_crypt_amd64 (keysched, (unsigned int)blkn, NULL, - inbuf, nblocks, nrounds, - c->u_mode.ocb.aad_offset, - c->u_mode.ocb.aad_sum, - c->u_mode.ocb.L[0], 2); + return _gcry_vaes_avx2_ocb_crypt_i386 (keysched, NULL, inbuf, nblocks, + ctx->rounds, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, + (unsigned int)blkn, + &c->u_mode.ocb.L[0], 2); } void _gcry_aes_vaes_xts_crypt (void *context, unsigned char *tweak, void *outbuf, const void *inbuf, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = context; const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; unsigned int nrounds = ctx->rounds; if (!encrypt && !ctx->decryption_prepared) { _gcry_aes_aesni_prepare_decryption (ctx); ctx->decryption_prepared = 1; } - _gcry_vaes_avx2_xts_crypt_amd64 (keysched, tweak, outbuf, inbuf, nblocks, + _gcry_vaes_avx2_xts_crypt_i386 (keysched, tweak, outbuf, inbuf, nblocks, nrounds, encrypt); } -#endif /* USE_VAES */ +#endif /* USE_VAES_I386 */ diff --git a/cipher/rijndael-vaes.c b/cipher/rijndael-vaes.c index ce9e18e7..478904d0 100644 --- a/cipher/rijndael-vaes.c +++ b/cipher/rijndael-vaes.c @@ -1,240 +1,240 @@ -/* VAES/AVX2 accelerated AES for Libgcrypt +/* VAES/AVX2 AMD64 accelerated AES for Libgcrypt * Copyright (C) 2021 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * */ #include #include #include #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_VAES # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define ASM_FUNC_ABI __attribute__((sysv_abi)) # else # define ASM_FUNC_ABI # endif extern void _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx); extern void _gcry_vaes_avx2_cbc_dec_amd64 (const void *keysched, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, unsigned int nrounds) ASM_FUNC_ABI; extern void _gcry_vaes_avx2_cfb_dec_amd64 (const void *keysched, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, unsigned int nrounds) ASM_FUNC_ABI; extern void _gcry_vaes_avx2_ctr_enc_amd64 (const void *keysched, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, unsigned int nrounds) ASM_FUNC_ABI; extern void _gcry_vaes_avx2_ctr32le_enc_amd64 (const void *keysched, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, unsigned int nrounds) ASM_FUNC_ABI; extern size_t _gcry_vaes_avx2_ocb_crypt_amd64 (const void *keysched, unsigned int blkn, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, unsigned int nrounds, unsigned char *offset, unsigned char *checksum, unsigned char *L_table, - int encrypt) ASM_FUNC_ABI; + int enc_dec_auth) ASM_FUNC_ABI; extern void _gcry_vaes_avx2_xts_crypt_amd64 (const void *keysched, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, unsigned int nrounds, int encrypt) ASM_FUNC_ABI; extern void _gcry_vaes_avx2_ecb_crypt_amd64 (const void *keysched, int encrypt, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, unsigned int nrounds) ASM_FUNC_ABI; void _gcry_aes_vaes_ecb_crypt (void *context, void *outbuf, const void *inbuf, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = context; const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; unsigned int nrounds = ctx->rounds; if (!encrypt && !ctx->decryption_prepared) { _gcry_aes_aesni_prepare_decryption (ctx); ctx->decryption_prepared = 1; } _gcry_vaes_avx2_ecb_crypt_amd64 (keysched, encrypt, outbuf, inbuf, nblocks, nrounds); } void _gcry_aes_vaes_cbc_dec (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks) { RIJNDAEL_context *ctx = context; const void *keysched = ctx->keyschdec32; unsigned int nrounds = ctx->rounds; if (!ctx->decryption_prepared) { _gcry_aes_aesni_prepare_decryption (ctx); ctx->decryption_prepared = 1; } _gcry_vaes_avx2_cbc_dec_amd64 (keysched, iv, outbuf, inbuf, nblocks, nrounds); } void _gcry_aes_vaes_cfb_dec (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks) { RIJNDAEL_context *ctx = context; const void *keysched = ctx->keyschenc32; unsigned int nrounds = ctx->rounds; _gcry_vaes_avx2_cfb_dec_amd64 (keysched, iv, outbuf, inbuf, nblocks, nrounds); } void _gcry_aes_vaes_ctr_enc (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks) { RIJNDAEL_context *ctx = context; const void *keysched = ctx->keyschenc32; unsigned int nrounds = ctx->rounds; _gcry_vaes_avx2_ctr_enc_amd64 (keysched, iv, outbuf, inbuf, nblocks, nrounds); } void _gcry_aes_vaes_ctr32le_enc (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks) { RIJNDAEL_context *ctx = context; const void *keysched = ctx->keyschenc32; unsigned int nrounds = ctx->rounds; _gcry_vaes_avx2_ctr32le_enc_amd64 (keysched, iv, outbuf, inbuf, nblocks, nrounds); } size_t _gcry_aes_vaes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = (void *)&c->context.c; const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int nrounds = ctx->rounds; u64 blkn = c->u_mode.ocb.data_nblocks; if (!encrypt && !ctx->decryption_prepared) { _gcry_aes_aesni_prepare_decryption (ctx); ctx->decryption_prepared = 1; } c->u_mode.ocb.data_nblocks = blkn + nblocks; return _gcry_vaes_avx2_ocb_crypt_amd64 (keysched, (unsigned int)blkn, outbuf, inbuf, nblocks, nrounds, c->u_iv.iv, c->u_ctr.ctr, c->u_mode.ocb.L[0], encrypt); } size_t _gcry_aes_vaes_ocb_auth (gcry_cipher_hd_t c, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const void *keysched = ctx->keyschenc32; const unsigned char *inbuf = inbuf_arg; unsigned int nrounds = ctx->rounds; u64 blkn = c->u_mode.ocb.aad_nblocks; c->u_mode.ocb.aad_nblocks = blkn + nblocks; return _gcry_vaes_avx2_ocb_crypt_amd64 (keysched, (unsigned int)blkn, NULL, inbuf, nblocks, nrounds, c->u_mode.ocb.aad_offset, c->u_mode.ocb.aad_sum, c->u_mode.ocb.L[0], 2); } void _gcry_aes_vaes_xts_crypt (void *context, unsigned char *tweak, void *outbuf, const void *inbuf, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = context; const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; unsigned int nrounds = ctx->rounds; if (!encrypt && !ctx->decryption_prepared) { _gcry_aes_aesni_prepare_decryption (ctx); ctx->decryption_prepared = 1; } _gcry_vaes_avx2_xts_crypt_amd64 (keysched, tweak, outbuf, inbuf, nblocks, nrounds, encrypt); } #endif /* USE_VAES */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 56acb199..f1683007 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1,2021 +1,2036 @@ /* Rijndael (AES) for GnuPG * Copyright (C) 2000, 2001, 2002, 2003, 2007, * 2008, 2011, 2012 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . ******************************************************************* * The code here is based on the optimized implementation taken from * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, * which carries this notice: *------------------------------------------ * rijndael-alg-fst.c v2.3 April '2000 * * Optimised ANSI C code * * authors: v1.0: Antoon Bosselaers * v2.0: Vincent Rijmen * v2.3: Paulo Barreto * * This code is placed in the public domain. *------------------------------------------ * * The SP800-38a document is available at: * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf * */ #include #include #include #include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_AMD64_ASM /* AMD64 assembly implementations of AES */ extern unsigned int _gcry_aes_amd64_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_amd64_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_AMD64_ASM*/ #ifdef USE_AESNI /* AES-NI (AMD64 & i386) accelerated implementations of AES */ extern void _gcry_aes_aesni_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_aesni_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_aesni_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_ctr32le_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_aesni_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_aesni_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif -#ifdef USE_VAES -/* VAES (AMD64) accelerated implementation of AES */ +#if defined(USE_VAES_I386) || defined(USE_VAES) +/* VAES (i386/AMD64) accelerated implementation of AES */ extern void _gcry_aes_vaes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_vaes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_vaes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_vaes_ctr32le_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_vaes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_vaes_ocb_auth (gcry_cipher_hd_t c, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_vaes_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_vaes_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif #ifdef USE_SSSE3 /* SSSE3 (AMD64) vector permutation implementation of AES */ extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_ssse3_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_ssse3_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ssse3_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); #endif #ifdef USE_PADLOCK extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); extern unsigned int _gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); extern void _gcry_aes_padlock_prepare_decryption (RIJNDAEL_context *ctx); #endif #ifdef USE_ARM_ASM /* ARM assembly implementations of AES */ extern unsigned int _gcry_aes_arm_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds, const void *encT); extern unsigned int _gcry_aes_arm_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds, const void *decT); #endif /*USE_ARM_ASM*/ #ifdef USE_ARM_CE /* ARMv8 Crypto Extension implementations of AES */ extern void _gcry_aes_armv8_ce_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_armv8_ce_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_armv8_ce_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_armv8_ce_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_armv8_ce_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_ctr32le_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_armv8_ce_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif /*USE_ARM_ASM*/ #ifdef USE_PPC_CRYPTO /* PowerPC Crypto implementations of AES */ extern void _gcry_aes_ppc8_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_ppc8_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_ppc8_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_ppc8_ctr32le_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); #endif /*USE_PPC_CRYPTO*/ #ifdef USE_PPC_CRYPTO_WITH_PPC9LE /* Power9 little-endian crypto implementations of AES */ extern unsigned int _gcry_aes_ppc9le_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern void _gcry_aes_ppc9le_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc9le_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); extern void _gcry_aes_ppc9le_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc9le_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern void _gcry_aes_ppc9le_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_ppc9le_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern size_t _gcry_aes_ppc9le_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); extern void _gcry_aes_ppc9le_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); extern void _gcry_aes_ppc9le_ctr32le_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); extern size_t _gcry_aes_p10le_gcm_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); #endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_S390X_CRYPTO /* zSeries crypto implementations of AES */ extern int _gcry_aes_s390x_setup_acceleration(RIJNDAEL_context *ctx, unsigned int keylen, unsigned int hwfeatures, cipher_bulk_ops_t *bulk_ops); extern void _gcry_aes_s390x_setkey(RIJNDAEL_context *ctx, const byte *key); extern void _gcry_aes_s390x_prepare_decryption(RIJNDAEL_context *ctx); extern unsigned int _gcry_aes_s390x_encrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); extern unsigned int _gcry_aes_s390x_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); #endif /*USE_S390X_CRYPTO*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static void _gcry_aes_cfb_enc (void *context, unsigned char *iv, void *outbuf, const void *inbuf, size_t nblocks); static void _gcry_aes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static void _gcry_aes_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); static void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); static size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); static size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); static void _gcry_aes_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); /* All the numbers. */ #include "rijndael-tables.h" /* Function prototypes. */ static const char *selftest(void); static void prepare_decryption(RIJNDAEL_context *ctx); /* Prefetching for encryption/decryption tables. */ static inline void prefetch_table(const volatile byte *tab, size_t len) { size_t i; for (i = 0; len - i >= 8 * 32; i += 8 * 32) { (void)tab[i + 0 * 32]; (void)tab[i + 1 * 32]; (void)tab[i + 2 * 32]; (void)tab[i + 3 * 32]; (void)tab[i + 4 * 32]; (void)tab[i + 5 * 32]; (void)tab[i + 6 * 32]; (void)tab[i + 7 * 32]; } for (; i < len; i += 32) { (void)tab[i]; } (void)tab[len - 1]; } static void prefetch_enc(void) { /* Modify counters to trigger copy-on-write and unsharing if physical pages * of look-up table are shared between processes. Modifying counters also * causes checksums for pages to change and hint same-page merging algorithm * that these pages are frequently changing. */ enc_tables.counter_head++; enc_tables.counter_tail++; /* Prefetch look-up tables to cache. */ prefetch_table((const void *)&enc_tables, sizeof(enc_tables)); } static void prefetch_dec(void) { /* Modify counters to trigger copy-on-write and unsharing if physical pages * of look-up table are shared between processes. Modifying counters also * causes checksums for pages to change and hint same-page merging algorithm * that these pages are frequently changing. */ dec_tables.counter_head++; dec_tables.counter_tail++; /* Prefetch look-up tables to cache. */ prefetch_table((const void *)&dec_tables, sizeof(dec_tables)); } static inline u32 sbox4(u32 inb4) { u32 out; out = (encT[(inb4 >> 0) & 0xffU] & 0xff00U) >> 8; out |= (encT[(inb4 >> 8) & 0xffU] & 0xff00U) >> 0; out |= (encT[(inb4 >> 16) & 0xffU] & 0xff0000U) << 0; out |= (encT[(inb4 >> 24) & 0xffU] & 0xff0000U) << 8; return out; } /* Perform the key setup. */ static gcry_err_code_t do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, cipher_bulk_ops_t *bulk_ops) { static int initialized = 0; static const char *selftest_failed = 0; void (*hw_setkey)(RIJNDAEL_context *ctx, const byte *key) = NULL; int rounds; unsigned int KC; unsigned int hwfeatures; /* The on-the-fly self tests are only run in non-fips mode. In fips mode explicit self-tests are required. Actually the on-the-fly self-tests are not fully thread-safe and it might happen that a failed self-test won't get noticed in another thread. FIXME: We might want to have a central registry of succeeded self-tests. */ if (!fips_mode () && !initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("%s\n", selftest_failed ); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; if( keylen == 128/8 ) { rounds = 10; KC = 4; } else if ( keylen == 192/8 ) { rounds = 12; KC = 6; } else if ( keylen == 256/8 ) { rounds = 14; KC = 8; } else return GPG_ERR_INV_KEYLEN; ctx->rounds = rounds; hwfeatures = _gcry_get_hw_features (); ctx->decryption_prepared = 0; /* Setup default bulk encryption routines. */ memset (bulk_ops, 0, sizeof(*bulk_ops)); bulk_ops->cfb_enc = _gcry_aes_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_ctr_enc; bulk_ops->ocb_crypt = _gcry_aes_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_xts_crypt; (void)hwfeatures; if (0) { ; } #ifdef USE_AESNI else if (hwfeatures & HWF_INTEL_AESNI) { hw_setkey = _gcry_aes_aesni_do_setkey; ctx->encrypt_fn = _gcry_aes_aesni_encrypt; ctx->decrypt_fn = _gcry_aes_aesni_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_aesni_prepare_decryption; ctx->use_avx = !!(hwfeatures & HWF_INTEL_AVX); ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2); /* Setup AES-NI bulk encryption routines. */ bulk_ops->cfb_enc = _gcry_aes_aesni_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_aesni_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_aesni_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_aesni_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_aesni_ctr_enc; bulk_ops->ctr32le_enc = _gcry_aes_aesni_ctr32le_enc; bulk_ops->ocb_crypt = _gcry_aes_aesni_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_aesni_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_aesni_xts_crypt; bulk_ops->ecb_crypt = _gcry_aes_aesni_ecb_crypt; #ifdef USE_VAES if ((hwfeatures & HWF_INTEL_VAES_VPCLMUL) && (hwfeatures & HWF_INTEL_AVX2)) { /* Setup VAES bulk encryption routines. */ bulk_ops->cfb_dec = _gcry_aes_vaes_cfb_dec; bulk_ops->cbc_dec = _gcry_aes_vaes_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_vaes_ctr_enc; bulk_ops->ctr32le_enc = _gcry_aes_vaes_ctr32le_enc; bulk_ops->ocb_crypt = _gcry_aes_vaes_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_vaes_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_vaes_xts_crypt; bulk_ops->ecb_crypt = _gcry_aes_vaes_ecb_crypt; } +#endif +#ifdef USE_VAES_I386 + if ((hwfeatures & HWF_INTEL_VAES_VPCLMUL) && + (hwfeatures & HWF_INTEL_AVX2)) + { + /* Setup VAES bulk encryption routines. */ + bulk_ops->cfb_dec = _gcry_aes_vaes_cfb_dec; + bulk_ops->cbc_dec = _gcry_aes_vaes_cbc_dec; + bulk_ops->ctr_enc = _gcry_aes_vaes_ctr_enc; + bulk_ops->ctr32le_enc = _gcry_aes_vaes_ctr32le_enc; + bulk_ops->ocb_crypt = _gcry_aes_vaes_ocb_crypt; + bulk_ops->ocb_auth = _gcry_aes_vaes_ocb_auth; + bulk_ops->xts_crypt = _gcry_aes_vaes_xts_crypt; + bulk_ops->ecb_crypt = _gcry_aes_vaes_ecb_crypt; + } #endif } #endif #ifdef USE_PADLOCK else if ((hwfeatures & HWF_PADLOCK_AES) && keylen == 128/8) { ctx->encrypt_fn = _gcry_aes_padlock_encrypt; ctx->decrypt_fn = _gcry_aes_padlock_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_padlock_prepare_decryption; memcpy (ctx->padlockkey, key, keylen); } #endif #ifdef USE_SSSE3 else if (hwfeatures & HWF_INTEL_SSSE3) { hw_setkey = _gcry_aes_ssse3_do_setkey; ctx->encrypt_fn = _gcry_aes_ssse3_encrypt; ctx->decrypt_fn = _gcry_aes_ssse3_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_ssse3_prepare_decryption; /* Setup SSSE3 bulk encryption routines. */ bulk_ops->cfb_enc = _gcry_aes_ssse3_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_ssse3_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_ssse3_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_ssse3_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_ssse3_ctr_enc; bulk_ops->ocb_crypt = _gcry_aes_ssse3_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_ssse3_ocb_auth; } #endif #ifdef USE_ARM_CE else if (hwfeatures & HWF_ARM_AES) { hw_setkey = _gcry_aes_armv8_ce_setkey; ctx->encrypt_fn = _gcry_aes_armv8_ce_encrypt; ctx->decrypt_fn = _gcry_aes_armv8_ce_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_armv8_ce_prepare_decryption; /* Setup ARM-CE bulk encryption routines. */ bulk_ops->cfb_enc = _gcry_aes_armv8_ce_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_armv8_ce_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_armv8_ce_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_armv8_ce_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_armv8_ce_ctr_enc; bulk_ops->ctr32le_enc = _gcry_aes_armv8_ce_ctr32le_enc; bulk_ops->ocb_crypt = _gcry_aes_armv8_ce_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_armv8_ce_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_armv8_ce_xts_crypt; bulk_ops->ecb_crypt = _gcry_aes_armv8_ce_ecb_crypt; } #endif #ifdef USE_PPC_CRYPTO_WITH_PPC9LE else if ((hwfeatures & HWF_PPC_VCRYPTO) && (hwfeatures & HWF_PPC_ARCH_3_00)) { hw_setkey = _gcry_aes_ppc8_setkey; ctx->encrypt_fn = _gcry_aes_ppc9le_encrypt; ctx->decrypt_fn = _gcry_aes_ppc9le_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption; /* Setup PPC9LE bulk encryption routines. */ bulk_ops->ecb_crypt = _gcry_aes_ppc9le_ecb_crypt; bulk_ops->cfb_enc = _gcry_aes_ppc9le_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_ppc9le_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_ppc9le_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_ppc9le_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_ppc9le_ctr_enc; bulk_ops->ocb_crypt = _gcry_aes_ppc9le_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_ppc9le_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_ppc9le_xts_crypt; bulk_ops->ctr32le_enc = _gcry_aes_ppc9le_ctr32le_enc; if (hwfeatures & HWF_PPC_ARCH_3_10) /* for P10 */ bulk_ops->gcm_crypt = _gcry_aes_p10le_gcm_crypt; # ifdef ENABLE_FORCE_SOFT_HWFEATURES /* HWF_PPC_ARCH_3_10 above is used as soft HW-feature indicator for P10. * Actual implementation works with HWF_PPC_ARCH_3_00 also. */ if (hwfeatures & HWF_PPC_ARCH_3_00) bulk_ops->gcm_crypt = _gcry_aes_p10le_gcm_crypt; # endif } #endif #ifdef USE_PPC_CRYPTO else if (hwfeatures & HWF_PPC_VCRYPTO) { hw_setkey = _gcry_aes_ppc8_setkey; ctx->encrypt_fn = _gcry_aes_ppc8_encrypt; ctx->decrypt_fn = _gcry_aes_ppc8_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption; /* Setup PPC8 bulk encryption routines. */ bulk_ops->ecb_crypt = _gcry_aes_ppc8_ecb_crypt; bulk_ops->cfb_enc = _gcry_aes_ppc8_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_ppc8_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_ppc8_cbc_enc; bulk_ops->cbc_dec = _gcry_aes_ppc8_cbc_dec; bulk_ops->ctr_enc = _gcry_aes_ppc8_ctr_enc; bulk_ops->ocb_crypt = _gcry_aes_ppc8_ocb_crypt; bulk_ops->ocb_auth = _gcry_aes_ppc8_ocb_auth; bulk_ops->xts_crypt = _gcry_aes_ppc8_xts_crypt; bulk_ops->ctr32le_enc = _gcry_aes_ppc8_ctr32le_enc; } #endif #ifdef USE_S390X_CRYPTO else if (_gcry_aes_s390x_setup_acceleration (ctx, keylen, hwfeatures, bulk_ops)) { hw_setkey = _gcry_aes_s390x_setkey; ctx->encrypt_fn = _gcry_aes_s390x_encrypt; ctx->decrypt_fn = _gcry_aes_s390x_decrypt; ctx->prefetch_enc_fn = NULL; ctx->prefetch_dec_fn = NULL; ctx->prepare_decryption = _gcry_aes_s390x_prepare_decryption; } #endif else { ctx->encrypt_fn = do_encrypt; ctx->decrypt_fn = do_decrypt; ctx->prefetch_enc_fn = prefetch_enc; ctx->prefetch_dec_fn = prefetch_dec; ctx->prepare_decryption = prepare_decryption; } /* NB: We don't yet support Padlock hardware key generation. */ if (hw_setkey) { hw_setkey (ctx, key); } else { u32 W_prev; u32 *W_u32 = ctx->keyschenc32b; byte rcon = 1; unsigned int i, j; prefetch_enc(); for (i = 0; i < KC; i += 2) { W_u32[i + 0] = buf_get_le32(key + i * 4 + 0); W_u32[i + 1] = buf_get_le32(key + i * 4 + 4); } for (i = KC, j = KC, W_prev = W_u32[KC - 1]; i < 4 * (rounds + 1); i += 2, j += 2) { u32 temp0 = W_prev; u32 temp1; if (j == KC) { j = 0; temp0 = sbox4(rol(temp0, 24)) ^ rcon; rcon = ((rcon << 1) ^ (-(rcon >> 7) & 0x1b)) & 0xff; } else if (KC == 8 && j == 4) { temp0 = sbox4(temp0); } temp1 = W_u32[i - KC + 0]; W_u32[i + 0] = temp0 ^ temp1; W_u32[i + 1] = W_u32[i - KC + 1] ^ temp0 ^ temp1; W_prev = W_u32[i + 1]; } } return 0; } static gcry_err_code_t rijndael_setkey (void *context, const byte *key, const unsigned keylen, cipher_bulk_ops_t *bulk_ops) { RIJNDAEL_context *ctx = context; return do_setkey (ctx, key, keylen, bulk_ops); } /* Make a decryption key from an encryption key. */ static void prepare_decryption( RIJNDAEL_context *ctx ) { const byte *sbox = ((const byte *)encT) + 1; int r; prefetch_enc(); prefetch_dec(); ctx->keyschdec32[0][0] = ctx->keyschenc32[0][0]; ctx->keyschdec32[0][1] = ctx->keyschenc32[0][1]; ctx->keyschdec32[0][2] = ctx->keyschenc32[0][2]; ctx->keyschdec32[0][3] = ctx->keyschenc32[0][3]; for (r = 1; r < ctx->rounds; r++) { u32 *wi = ctx->keyschenc32[r]; u32 *wo = ctx->keyschdec32[r]; u32 wt; wt = wi[0]; wo[0] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[1]; wo[1] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[2]; wo[2] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); wt = wi[3]; wo[3] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0) ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1) ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2) ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3); } ctx->keyschdec32[r][0] = ctx->keyschenc32[r][0]; ctx->keyschdec32[r][1] = ctx->keyschenc32[r][1]; ctx->keyschdec32[r][2] = ctx->keyschenc32[r][2]; ctx->keyschdec32[r][3] = ctx->keyschenc32[r][3]; } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Encrypt one block. A and B may be the same. */ static unsigned int do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschenc32) const byte *sbox = ((const byte *)encT) + 1; int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[0][0]; sa[1] = sb[1] ^ rk[0][1]; sa[2] = sb[2] ^ rk[0][2]; sa[3] = sb[3] ^ rk[0][3]; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; for (r = 2; r < rounds; r++) { sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r++; sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } /* Last round is special. */ sb[0] = ((u32)sbox[(byte)(sa[0] >> (0 * 8)) * 4]) << (0 * 8); sb[3] = ((u32)sbox[(byte)(sa[0] >> (1 * 8)) * 4]) << (1 * 8); sb[2] = ((u32)sbox[(byte)(sa[0] >> (2 * 8)) * 4]) << (2 * 8); sb[1] = ((u32)sbox[(byte)(sa[0] >> (3 * 8)) * 4]) << (3 * 8); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= ((u32)sbox[(byte)(sa[1] >> (0 * 8)) * 4]) << (0 * 8); sa[0] ^= ((u32)sbox[(byte)(sa[1] >> (1 * 8)) * 4]) << (1 * 8); sb[3] ^= ((u32)sbox[(byte)(sa[1] >> (2 * 8)) * 4]) << (2 * 8); sb[2] ^= ((u32)sbox[(byte)(sa[1] >> (3 * 8)) * 4]) << (3 * 8); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= ((u32)sbox[(byte)(sa[2] >> (0 * 8)) * 4]) << (0 * 8); sa[1] ^= ((u32)sbox[(byte)(sa[2] >> (1 * 8)) * 4]) << (1 * 8); sa[0] ^= ((u32)sbox[(byte)(sa[2] >> (2 * 8)) * 4]) << (2 * 8); sb[3] ^= ((u32)sbox[(byte)(sa[2] >> (3 * 8)) * 4]) << (3 * 8); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= ((u32)sbox[(byte)(sa[3] >> (0 * 8)) * 4]) << (0 * 8); sa[2] ^= ((u32)sbox[(byte)(sa[3] >> (1 * 8)) * 4]) << (1 * 8); sa[1] ^= ((u32)sbox[(byte)(sa[3] >> (2 * 8)) * 4]) << (2 * 8); sa[0] ^= ((u32)sbox[(byte)(sa[3] >> (3 * 8)) * 4]) << (3 * 8); sa[3] = rk[r][3] ^ sb[3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56 + 2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, enc_tables.T); #elif defined(USE_ARM_ASM) return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, enc_tables.T); #else return do_encrypt_fn (ctx, bx, ax); #endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/ } static unsigned int rijndael_encrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); return ctx->encrypt_fn (ctx, b, a); } /* Bulk encryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_aes_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { /* Encrypt the IV. */ burn_depth = encrypt_fn (ctx, iv, iv); /* XOR the input with the IV and store input into IV. */ cipher_block_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_aes_cbc_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char *last_iv; unsigned int burn_depth = 0; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); last_iv = iv; for ( ;nblocks; nblocks-- ) { cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, outbuf, outbuf); last_iv = outbuf; inbuf += BLOCKSIZE; if (!cbc_mac) outbuf += BLOCKSIZE; } if (last_iv != iv) cipher_block_cpy (iv, last_iv, BLOCKSIZE); if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption of complete blocks in CTR mode. Caller needs to make sure that CTR is aligned on a 16 byte boundary if AESNI; the minimum alignment is for an u32. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be of size BLOCKSIZE. */ static void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { /* Encrypt the counter. */ burn_depth = encrypt_fn (ctx, tmp.x1, ctr); /* XOR the input with the encrypted counter and store in output. */ cipher_block_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; /* Increment the counter. */ cipher_block_add(ctr, 1, BLOCKSIZE); } wipememory(&tmp, sizeof(tmp)); if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } #if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) /* Decrypt one block. A and B may be the same. */ static unsigned int do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, const unsigned char *a) { #define rk (ctx->keyschdec32) int rounds = ctx->rounds; int r; u32 sa[4]; u32 sb[4]; sb[0] = buf_get_le32(a + 0); sb[1] = buf_get_le32(a + 4); sb[2] = buf_get_le32(a + 8); sb[3] = buf_get_le32(a + 12); sa[0] = sb[0] ^ rk[rounds][0]; sa[1] = sb[1] ^ rk[rounds][1]; sa[2] = sb[2] ^ rk[rounds][2]; sa[3] = sb[3] ^ rk[rounds][3]; for (r = rounds - 1; r > 1; r--) { sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; r--; sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[r][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[r][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[r][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[r][3] ^ sb[3]; } sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8)); sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8)); sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8)); sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8)); sa[0] = rk[1][0] ^ sb[0]; sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8)); sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8)); sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8)); sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8)); sa[1] = rk[1][1] ^ sb[1]; sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8)); sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8)); sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8)); sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8)); sa[2] = rk[1][2] ^ sb[2]; sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8)); sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8)); sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8)); sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8)); sa[3] = rk[1][3] ^ sb[3]; /* Last round is special. */ sb[0] = (u32)inv_sbox[(byte)(sa[0] >> (0 * 8))] << (0 * 8); sb[1] = (u32)inv_sbox[(byte)(sa[0] >> (1 * 8))] << (1 * 8); sb[2] = (u32)inv_sbox[(byte)(sa[0] >> (2 * 8))] << (2 * 8); sb[3] = (u32)inv_sbox[(byte)(sa[0] >> (3 * 8))] << (3 * 8); sa[0] = sb[0] ^ rk[0][0]; sb[1] ^= (u32)inv_sbox[(byte)(sa[1] >> (0 * 8))] << (0 * 8); sb[2] ^= (u32)inv_sbox[(byte)(sa[1] >> (1 * 8))] << (1 * 8); sb[3] ^= (u32)inv_sbox[(byte)(sa[1] >> (2 * 8))] << (2 * 8); sa[0] ^= (u32)inv_sbox[(byte)(sa[1] >> (3 * 8))] << (3 * 8); sa[1] = sb[1] ^ rk[0][1]; sb[2] ^= (u32)inv_sbox[(byte)(sa[2] >> (0 * 8))] << (0 * 8); sb[3] ^= (u32)inv_sbox[(byte)(sa[2] >> (1 * 8))] << (1 * 8); sa[0] ^= (u32)inv_sbox[(byte)(sa[2] >> (2 * 8))] << (2 * 8); sa[1] ^= (u32)inv_sbox[(byte)(sa[2] >> (3 * 8))] << (3 * 8); sa[2] = sb[2] ^ rk[0][2]; sb[3] ^= (u32)inv_sbox[(byte)(sa[3] >> (0 * 8))] << (0 * 8); sa[0] ^= (u32)inv_sbox[(byte)(sa[3] >> (1 * 8))] << (1 * 8); sa[1] ^= (u32)inv_sbox[(byte)(sa[3] >> (2 * 8))] << (2 * 8); sa[2] ^= (u32)inv_sbox[(byte)(sa[3] >> (3 * 8))] << (3 * 8); sa[3] = sb[3] ^ rk[0][3]; buf_put_le32(b + 0, sa[0]); buf_put_le32(b + 4, sa[1]); buf_put_le32(b + 8, sa[2]); buf_put_le32(b + 12, sa[3]); #undef rk return (56+2*sizeof(int)); } #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ /* Decrypt one block. AX and BX may be the same. */ static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { #ifdef USE_AMD64_ASM return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, dec_tables.T); #elif defined(USE_ARM_ASM) return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, dec_tables.T); #else return do_decrypt_fn (ctx, bx, ax); #endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ } static inline void check_decryption_preparation (RIJNDAEL_context *ctx) { if ( !ctx->decryption_prepared ) { ctx->prepare_decryption ( ctx ); ctx->decryption_prepared = 1; } } static unsigned int rijndael_decrypt (void *context, byte *b, const byte *a) { RIJNDAEL_context *ctx = context; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); return ctx->decrypt_fn (ctx, b, a); } /* Bulk decryption of complete blocks in CFB mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_aes_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { burn_depth = encrypt_fn (ctx, iv, iv); cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk decryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of cipher.c. */ static void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); for ( ;nblocks; nblocks-- ) { /* INBUF is needed later and it may be identical to OUTBUF, so store the intermediate result to SAVEBUF. */ burn_depth = decrypt_fn (ctx, savebuf, inbuf); cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } wipememory(savebuf, sizeof(savebuf)); if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } /* Bulk encryption/decryption of complete blocks in OCB mode. */ static size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; if (encrypt) { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE); cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE); cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE); /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); /* Checksum_i = Checksum_{i-1} xor P_i */ cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } } if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk authentication of complete blocks in OCB mode. */ static size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned int burn_depth = 0; union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.aad_nblocks; const unsigned char *l = ocb_get_l(c, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ cipher_block_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE); burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); abuf += BLOCKSIZE; } wipememory(&l_tmp, sizeof(l_tmp)); if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); return 0; } /* Bulk encryption/decryption of complete blocks in XTS mode. */ static void _gcry_aes_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned int burn_depth = 0; rijndael_cryptfn_t crypt_fn; u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry; if (encrypt) { if (ctx->prefetch_enc_fn) ctx->prefetch_enc_fn(); crypt_fn = ctx->encrypt_fn; } else { check_decryption_preparation (ctx); if (ctx->prefetch_dec_fn) ctx->prefetch_dec_fn(); crypt_fn = ctx->decrypt_fn; } tweak_next_lo = buf_get_le64 (tweak + 0); tweak_next_hi = buf_get_le64 (tweak + 8); while (nblocks) { tweak_lo = tweak_next_lo; tweak_hi = tweak_next_hi; /* Xor-Encrypt/Decrypt-Xor block. */ tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo; tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi; buf_put_le64 (outbuf + 0, tmp_lo); buf_put_le64 (outbuf + 8, tmp_hi); /* Generate next tweak. */ carry = -(tweak_next_hi >> 63) & 0x87; tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63); tweak_next_lo = (tweak_next_lo << 1) ^ carry; burn_depth = crypt_fn (ctx, outbuf, outbuf); buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo); buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi); outbuf += GCRY_XTS_BLOCK_LEN; inbuf += GCRY_XTS_BLOCK_LEN; nblocks--; } buf_put_le64 (tweak + 0, tweak_next_lo); buf_put_le64 (tweak + 8, tweak_next_hi); if (burn_depth) _gcry_burn_stack (burn_depth + 5 * sizeof(void *)); } /* Run the self-tests for AES 128. Returns NULL on success. */ static const char* selftest_basic_128 (void) { RIJNDAEL_context *ctx; unsigned char ctxmem[sizeof(*ctx) + 16]; unsigned char scratch[16]; cipher_bulk_ops_t bulk_ops; /* The test vectors are from the AES supplied ones; more or less randomly taken from ecb_tbl.txt (I=42,81,14) */ #if 1 static const unsigned char plaintext_128[16] = { 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A }; static const unsigned char key_128[16] = { 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA }; static const unsigned char ciphertext_128[16] = { 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD }; #else /* Test vectors from fips-197, appendix C. */ # warning debug test vectors in use static const unsigned char plaintext_128[16] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff }; static const unsigned char key_128[16] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ }; static const unsigned char ciphertext_128[16] = { 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a }; #endif ctx = (void *)(ctxmem + ((16 - ((uintptr_t)ctxmem & 15)) & 15)); rijndael_setkey (ctx, key_128, sizeof (key_128), &bulk_ops); rijndael_encrypt (ctx, scratch, plaintext_128); if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) { return "AES-128 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) return "AES-128 test decryption failed."; return NULL; } /* Run the self-tests for AES 192. Returns NULL on success. */ static const char* selftest_basic_192 (void) { RIJNDAEL_context *ctx; unsigned char ctxmem[sizeof(*ctx) + 16]; unsigned char scratch[16]; cipher_bulk_ops_t bulk_ops; static unsigned char plaintext_192[16] = { 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 }; static unsigned char key_192[24] = { 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 }; static const unsigned char ciphertext_192[16] = { 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA }; ctx = (void *)(ctxmem + ((16 - ((uintptr_t)ctxmem & 15)) & 15)); rijndael_setkey (ctx, key_192, sizeof(key_192), &bulk_ops); rijndael_encrypt (ctx, scratch, plaintext_192); if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) { return "AES-192 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) return "AES-192 test decryption failed."; return NULL; } /* Run the self-tests for AES 256. Returns NULL on success. */ static const char* selftest_basic_256 (void) { RIJNDAEL_context *ctx; unsigned char ctxmem[sizeof(*ctx) + 16]; unsigned char scratch[16]; cipher_bulk_ops_t bulk_ops; static unsigned char plaintext_256[16] = { 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 }; static unsigned char key_256[32] = { 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E }; static const unsigned char ciphertext_256[16] = { 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 }; ctx = (void *)(ctxmem + ((16 - ((uintptr_t)ctxmem & 15)) & 15)); rijndael_setkey (ctx, key_256, sizeof(key_256), &bulk_ops); rijndael_encrypt (ctx, scratch, plaintext_256); if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) { return "AES-256 test encryption failed."; } rijndael_decrypt (ctx, scratch, scratch); if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) return "AES-256 test decryption failed."; return NULL; } /* Run all the self-tests and return NULL on success. This function is used for the on-the-fly self-tests. */ static const char * selftest (void) { const char *r; if ( (r = selftest_basic_128 ()) || (r = selftest_basic_192 ()) || (r = selftest_basic_256 ()) ) return r; return r; } /* SP800-38a.pdf for AES-128. */ static const char * selftest_fips_128_38a (int requested_mode) { static const struct tv { int mode; const unsigned char key[16]; const unsigned char iv[16]; struct { const unsigned char input[16]; const unsigned char output[16]; } data[4]; } tv[2] = { { GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } } }, { GCRY_CIPHER_MODE_OFB, { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, { { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, } } }; unsigned char scratch[16]; gpg_error_t err; int tvi, idx; gcry_cipher_hd_t hdenc = NULL; gcry_cipher_hd_t hddec = NULL; #define Fail(a) do { \ _gcry_cipher_close (hdenc); \ _gcry_cipher_close (hddec); \ return a; \ } while (0) gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); for (tvi=0; tvi < DIM (tv); tvi++) if (tv[tvi].mode == requested_mode) break; if (tvi == DIM (tv)) Fail ("no test data for this mode"); err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); if (err) Fail ("open"); err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); if (!err) err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); if (err) Fail ("set key"); err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); if (!err) err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); if (err) Fail ("set IV"); for (idx=0; idx < DIM (tv[tvi].data); idx++) { err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, tv[tvi].data[idx].input, sizeof tv[tvi].data[idx].input); if (err) Fail ("encrypt command"); if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) Fail ("encrypt mismatch"); err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, tv[tvi].data[idx].output, sizeof tv[tvi].data[idx].output); if (err) Fail ("decrypt command"); if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) Fail ("decrypt mismatch"); } #undef Fail _gcry_cipher_close (hdenc); _gcry_cipher_close (hddec); return NULL; } /* Complete selftest for AES-128 with all modes and driver code. */ static gpg_err_code_t selftest_fips_128 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; what = "low-level"; errtxt = selftest_basic_128 (); if (errtxt) goto failed; if (extended) { what = "cfb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); if (errtxt) goto failed; what = "ofb"; errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES128, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-192. */ static gpg_err_code_t selftest_fips_192 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_192 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES192, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Complete selftest for AES-256. */ static gpg_err_code_t selftest_fips_256 (int extended, selftest_report_func_t report) { const char *what; const char *errtxt; (void)extended; /* No extended tests available. */ what = "low-level"; errtxt = selftest_basic_256 (); if (errtxt) goto failed; return 0; /* Succeeded. */ failed: if (report) report ("cipher", GCRY_CIPHER_AES256, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_CIPHER_AES128: ec = selftest_fips_128 (extended, report); break; case GCRY_CIPHER_AES192: ec = selftest_fips_192 (extended, report); break; case GCRY_CIPHER_AES256: ec = selftest_fips_256 (extended, report); break; default: ec = GPG_ERR_CIPHER_ALGO; break; } return ec; } static const char *rijndael_names[] = { "RIJNDAEL", "AES128", "AES-128", NULL }; static const gcry_cipher_oid_spec_t rijndael_oids[] = { { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, { "2.16.840.1.101.3.4.1.6", GCRY_CIPHER_MODE_GCM }, { "2.16.840.1.101.3.4.1.7", GCRY_CIPHER_MODE_CCM }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes = { GCRY_CIPHER_AES, {0, 1}, "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael192_names[] = { "RIJNDAEL192", "AES-192", NULL }; static const gcry_cipher_oid_spec_t rijndael192_oids[] = { { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, { "2.16.840.1.101.3.4.1.26", GCRY_CIPHER_MODE_GCM }, { "2.16.840.1.101.3.4.1.27", GCRY_CIPHER_MODE_CCM }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes192 = { GCRY_CIPHER_AES192, {0, 1}, "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; static const char *rijndael256_names[] = { "RIJNDAEL256", "AES-256", NULL }; static const gcry_cipher_oid_spec_t rijndael256_oids[] = { { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, { "2.16.840.1.101.3.4.1.46", GCRY_CIPHER_MODE_GCM }, { "2.16.840.1.101.3.4.1.47", GCRY_CIPHER_MODE_CCM }, { NULL } }; gcry_cipher_spec_t _gcry_cipher_spec_aes256 = { GCRY_CIPHER_AES256, {0, 1}, "AES256", rijndael256_names, rijndael256_oids, 16, 256, sizeof (RIJNDAEL_context), rijndael_setkey, rijndael_encrypt, rijndael_decrypt, NULL, NULL, run_selftests }; diff --git a/configure.ac b/configure.ac index e00c0445..e0c52ec1 100644 --- a/configure.ac +++ b/configure.ac @@ -1,3826 +1,3855 @@ # Configure.ac script for Libgcrypt # Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, # 2007, 2008, 2009, 2011 Free Software Foundation, Inc. # Copyright (C) 2012-2021 g10 Code GmbH # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # (Process this file with autoconf to produce a configure script.) AC_REVISION($Revision$) AC_PREREQ([2.69]) min_automake_version="1.14" # To build a release you need to create a tag with the version number # (git tag -s libgcrypt-n.m.k) and run "./autogen.sh --force". Please # bump the version number immediately after the release and do another # commit and push so that the git magic is able to work. See below # for the LT versions. m4_define([mym4_package],[libgcrypt]) m4_define([mym4_major], [1]) m4_define([mym4_minor], [11]) m4_define([mym4_micro], [0]) # Below is m4 magic to extract and compute the git revision number, # the decimalized short revision number, a beta version string and a # flag indicating a development version (mym4_isbeta). Note that the # m4 processing is done by autoconf and not during the configure run. m4_define([mym4_verslist], m4_split(m4_esyscmd([./autogen.sh --find-version] \ mym4_package mym4_major mym4_minor mym4_micro),[:])) m4_define([mym4_isbeta], m4_argn(2, mym4_verslist)) m4_define([mym4_version], m4_argn(4, mym4_verslist)) m4_define([mym4_revision], m4_argn(7, mym4_verslist)) m4_define([mym4_revision_dec], m4_argn(8, mym4_verslist)) m4_esyscmd([echo ]mym4_version[>VERSION]) AC_INIT([mym4_package],[mym4_version],[https://bugs.gnupg.org]) # LT Version numbers, remember to change them just *before* a release. # NOET NOTE - Already updated for a 1.11 series - NOTE NOTE # (Code changed: REVISION++) # (Interfaces added/removed/changed: CURRENT++, REVISION=0) # (Interfaces added: AGE++) # (Interfaces removed: AGE=0) # # (Interfaces removed: CURRENT++, AGE=0, REVISION=0) # (Interfaces added: CURRENT++, AGE++, REVISION=0) # (No interfaces changed: REVISION++) LIBGCRYPT_LT_CURRENT=25 LIBGCRYPT_LT_AGE=5 LIBGCRYPT_LT_REVISION=0 ################################################ AC_SUBST(LIBGCRYPT_LT_CURRENT) AC_SUBST(LIBGCRYPT_LT_AGE) AC_SUBST(LIBGCRYPT_LT_REVISION) # If the API is changed in an incompatible way: increment the next counter. # # 1.6: ABI and API change but the change is to most users irrelevant # and thus the API version number has not been incremented. LIBGCRYPT_CONFIG_API_VERSION=1 # If you change the required gpg-error version, please remove # unnecessary error code defines in src/gcrypt-int.h. NEED_GPG_ERROR_VERSION=1.27 AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_SRCDIR([src/libgcrypt.vers]) AM_INIT_AUTOMAKE([serial-tests dist-bzip2]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_LIBOBJ_DIR([compat]) AC_CANONICAL_HOST AM_MAINTAINER_MODE AM_SILENT_RULES AC_USE_SYSTEM_EXTENSIONS AC_ARG_VAR(SYSROOT,[locate config scripts also below that directory]) AH_TOP([ #ifndef _GCRYPT_CONFIG_H_INCLUDED #define _GCRYPT_CONFIG_H_INCLUDED /* Enable gpg-error's strerror macro for W32CE. */ #define GPG_ERR_ENABLE_ERRNO_MACROS 1 ]) AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 /* Add .note.gnu.property section for Intel CET in assembler sources when CET is enabled. */ #if defined(__ASSEMBLER__) && defined(__CET__) # include #endif /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK # if defined(__BIG_ENDIAN__) # define WORDS_BIGENDIAN 1 # elif defined(__LITTLE_ENDIAN__) # undef WORDS_BIGENDIAN # else # error "No endianness found" # endif #endif /*DISABLED_ENDIAN_CHECK*/ /* We basically use the original Camellia source. Make sure the symbols properly prefixed. */ #define CAMELLIA_EXT_SYM_PREFIX _gcry_ #endif /*_GCRYPT_CONFIG_H_INCLUDED*/ ]) AH_VERBATIM([_REENTRANT], [/* To allow the use of Libgcrypt in multithreaded programs we have to use special features from the library. */ #ifndef _REENTRANT # define _REENTRANT 1 #endif ]) ###################### ## Basic checks. ### (we need some results later on (e.g. $GCC) ###################### AC_PROG_MAKE_SET missing_dir=`cd $ac_aux_dir && pwd` AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) # AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir) AC_PROG_CC AC_PROG_CPP AM_PROG_CC_C_O AM_PROG_AS AC_SEARCH_LIBS([strerror],[cposix]) AC_PROG_INSTALL AC_PROG_AWK # Taken from mpfr-4.0.1, then modified for LDADD_FOR_TESTS_KLUDGE dnl Under Linux, make sure that the old dtags are used if LD_LIBRARY_PATH dnl is defined. The issue is that with the new dtags, LD_LIBRARY_PATH has dnl the precedence over the run path, so that if a compatible MPFR library dnl is installed in some directory from $LD_LIBRARY_PATH, then the tested dnl MPFR library will be this library instead of the MPFR library from the dnl build tree. Other OS with the same issue might be added later. dnl dnl References: dnl https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=859732 dnl http://lists.gnu.org/archive/html/libtool/2017-05/msg00000.html dnl dnl We need to check whether --disable-new-dtags is supported as alternate dnl linkers may be used (e.g., with tcc: CC=tcc LD=tcc). dnl case $host in *-*-linux*) if test -n "$LD_LIBRARY_PATH"; then saved_LDFLAGS="$LDFLAGS" LDADD_FOR_TESTS_KLUDGE="-Wl,--disable-new-dtags" LDFLAGS="$LDFLAGS $LDADD_FOR_TESTS_KLUDGE" AC_MSG_CHECKING(whether --disable-new-dtags is supported by the linker) AC_LINK_IFELSE([AC_LANG_SOURCE([[ int main (void) { return 0; } ]])], [AC_MSG_RESULT(yes (use it since LD_LIBRARY_PATH is set))], [AC_MSG_RESULT(no) LDADD_FOR_TESTS_KLUDGE="" ]) LDFLAGS="$saved_LDFLAGS" fi ;; esac AC_SUBST([LDADD_FOR_TESTS_KLUDGE]) VERSION_NUMBER=m4_esyscmd(printf "0x%02x%02x%02x" mym4_major \ mym4_minor mym4_micro) AC_SUBST(VERSION_NUMBER) # We need to compile and run a program on the build machine. AX_CC_FOR_BUILD LT_PREREQ([2.2.6]) LT_INIT([win32-dll disable-static]) LT_LANG([Windows Resource]) ########################## ## General definitions. ## ########################## # Used by libgcrypt-config LIBGCRYPT_CONFIG_LIBS="-lgcrypt" LIBGCRYPT_CONFIG_CFLAGS="" LIBGCRYPT_CONFIG_HOST="$host" # Definitions for symmetric ciphers. available_ciphers="arcfour blowfish cast5 des aes twofish serpent rfc2268 seed" available_ciphers="$available_ciphers camellia idea salsa20 gost28147 chacha20" available_ciphers="$available_ciphers sm4 aria" enabled_ciphers="" # Definitions for public-key ciphers. available_pubkey_ciphers="dsa elgamal rsa ecc" enabled_pubkey_ciphers="" # Definitions for message digests. available_digests="crc gostr3411-94 md2 md4 md5 rmd160 sha1 sha256 sha512" available_digests="$available_digests sha3 tiger whirlpool stribog blake2" available_digests="$available_digests sm3" enabled_digests="" # Definitions for kdfs (optional ones) available_kdfs="s2k pkdf2 scrypt" enabled_kdfs="" # Definitions for random modules. available_random_modules="getentropy linux egd unix" auto_random_modules="$available_random_modules" # Supported thread backends. LIBGCRYPT_THREAD_MODULES="" # Other definitions. have_w32_system=no have_w32ce_system=no have_pthread=no # Setup some stuff depending on host. case "${host}" in *-*-mingw32*) ac_cv_have_dev_random=no have_w32_system=yes case "${host}" in *-mingw32ce*) have_w32ce_system=yes available_random_modules="w32ce" ;; *) available_random_modules="w32" ;; esac AC_DEFINE(USE_ONLY_8DOT3,1, [set this to limit filenames to the 8.3 format]) AC_DEFINE(HAVE_DRIVE_LETTERS,1, [defined if we must run on a stupid file system]) AC_DEFINE(HAVE_DOSISH_SYSTEM,1, [defined if we run on some of the PCDOS like systems (DOS, Windoze. OS/2) with special properties like no file modes]) ;; i?86-emx-os2 | i?86-*-os2*emx) # OS/2 with the EMX environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; i?86-*-msdosdjgpp*) # DOS with the DJGPP environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; *-*-hpux*) if test -z "$GCC" ; then CFLAGS="$CFLAGS -Ae -D_HPUX_SOURCE" fi ;; *-dec-osf4*) if test -z "$GCC" ; then # Suppress all warnings # to get rid of the unsigned/signed char mismatch warnings. CFLAGS="$CFLAGS -w" fi ;; m68k-atari-mint) ;; *-apple-darwin*) AC_DEFINE(_DARWIN_C_SOURCE, 1, Expose all libc features (__DARWIN_C_FULL).) AC_DEFINE(USE_POSIX_SPAWN_FOR_TESTS, 1, [defined if we use posix_spawn in test program]) AC_CHECK_HEADERS(spawn.h) ;; *) ;; esac if test "$have_w32_system" = yes; then AC_DEFINE(HAVE_W32_SYSTEM,1, [Defined if we run on a W32 API based system]) if test "$have_w32ce_system" = yes; then AC_DEFINE(HAVE_W32CE_SYSTEM,1,[Defined if we run on WindowsCE]) fi fi AM_CONDITIONAL(HAVE_W32_SYSTEM, test "$have_w32_system" = yes) AM_CONDITIONAL(HAVE_W32CE_SYSTEM, test "$have_w32ce_system" = yes) # A printable OS Name is sometimes useful. case "${host}" in *-*-mingw32ce*) PRINTABLE_OS_NAME="W32CE" ;; *-*-mingw32*) PRINTABLE_OS_NAME="W32" ;; i?86-emx-os2 | i?86-*-os2*emx ) PRINTABLE_OS_NAME="OS/2" ;; i?86-*-msdosdjgpp*) PRINTABLE_OS_NAME="MSDOS/DJGPP" ;; *-linux*) PRINTABLE_OS_NAME="GNU/Linux" ;; *) PRINTABLE_OS_NAME=`uname -s || echo "Unknown"` ;; esac NAME_OF_DEV_RANDOM="/dev/random" NAME_OF_DEV_URANDOM="/dev/urandom" AC_ARG_ENABLE(endian-check, AS_HELP_STRING([--disable-endian-check], [disable the endian check and trust the OS provided macros]), endiancheck=$enableval,endiancheck=yes) if test x"$endiancheck" = xyes ; then AC_C_BIGENDIAN else AC_DEFINE(DISABLED_ENDIAN_CHECK,1,[configure did not test for endianness]) fi AC_CHECK_SIZEOF(unsigned short, 2) AC_CHECK_SIZEOF(unsigned int, 4) AC_CHECK_SIZEOF(unsigned long, 4) AC_CHECK_SIZEOF(unsigned long long, 0) AC_CHECK_SIZEOF(unsigned __int128, 0) AC_CHECK_SIZEOF(void *, 0) AC_TYPE_UINTPTR_T if test "$ac_cv_sizeof_unsigned_short" = "0" \ || test "$ac_cv_sizeof_unsigned_int" = "0" \ || test "$ac_cv_sizeof_unsigned_long" = "0"; then AC_MSG_WARN([Hmmm, something is wrong with the sizes - using defaults]); fi # Ensure that we have UINT64_C before we bother to check for uint64_t AC_CACHE_CHECK([for UINT64_C],[gnupg_cv_uint64_c_works], AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[uint64_t foo=UINT64_C(42);]])], gnupg_cv_uint64_c_works=yes,gnupg_cv_uint64_c_works=no)) if test "$gnupg_cv_uint64_c_works" = "yes" ; then AC_CHECK_SIZEOF(uint64_t) fi # Do we have any 64-bit data types? if test "$ac_cv_sizeof_unsigned_int" != "8" \ && test "$ac_cv_sizeof_unsigned_long" != "8" \ && test "$ac_cv_sizeof_unsigned_long_long" != "8" \ && test "$ac_cv_sizeof_uint64_t" != "8"; then AC_MSG_ERROR([[ *** *** No 64-bit integer type available. *** It is not possible to build Libgcrypt on this platform. ***]]) fi # If not specified otherwise, all available algorithms will be # included. default_ciphers="$available_ciphers" default_pubkey_ciphers="$available_pubkey_ciphers" default_digests="$available_digests" default_kdfs="$available_kdfs" # Blacklist MD2 by default default_digests=`echo $default_digests | sed -e 's/md2//g'` # Substitutions to set generated files in a Emacs buffer to read-only. AC_SUBST(emacs_local_vars_begin, ['Local Variables:']) AC_SUBST(emacs_local_vars_read_only, ['buffer-read-only: t']) AC_SUBST(emacs_local_vars_end, ['End:']) ############################ ## Command line switches. ## ############################ # Implementation of the --enable-ciphers switch. AC_ARG_ENABLE(ciphers, AS_HELP_STRING([--enable-ciphers=ciphers], [select the symmetric ciphers to include]), [enabled_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_ciphers=""]) if test "x$enabled_ciphers" = "x" \ -o "$enabled_ciphers" = "yes" \ -o "$enabled_ciphers" = "no"; then enabled_ciphers=$default_ciphers fi AC_MSG_CHECKING([which symmetric ciphers to include]) for cipher in $enabled_ciphers; do LIST_MEMBER($cipher, $available_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported cipher "$cipher" specified]) fi done AC_MSG_RESULT([$enabled_ciphers]) # Implementation of the --enable-pubkey-ciphers switch. AC_ARG_ENABLE(pubkey-ciphers, AS_HELP_STRING([--enable-pubkey-ciphers=ciphers], [select the public-key ciphers to include]), [enabled_pubkey_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_pubkey_ciphers=""]) if test "x$enabled_pubkey_ciphers" = "x" \ -o "$enabled_pubkey_ciphers" = "yes" \ -o "$enabled_pubkey_ciphers" = "no"; then enabled_pubkey_ciphers=$default_pubkey_ciphers fi AC_MSG_CHECKING([which public-key ciphers to include]) for cipher in $enabled_pubkey_ciphers; do LIST_MEMBER($cipher, $available_pubkey_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported public-key cipher specified]) fi done AC_MSG_RESULT([$enabled_pubkey_ciphers]) # Implementation of the --enable-digests switch. AC_ARG_ENABLE(digests, AS_HELP_STRING([--enable-digests=digests], [select the message digests to include]), [enabled_digests=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_digests=""]) if test "x$enabled_digests" = "x" \ -o "$enabled_digests" = "yes" \ -o "$enabled_digests" = "no"; then enabled_digests=$default_digests fi AC_MSG_CHECKING([which message digests to include]) for digest in $enabled_digests; do LIST_MEMBER($digest, $available_digests) if test "$found" = "0"; then AC_MSG_ERROR([unsupported message digest specified]) fi done AC_MSG_RESULT([$enabled_digests]) # Implementation of the --enable-kdfs switch. AC_ARG_ENABLE(kdfs, AS_HELP_STRING([--enable-kfds=kdfs], [select the KDFs to include]), [enabled_kdfs=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_kdfs=""]) if test "x$enabled_kdfs" = "x" \ -o "$enabled_kdfs" = "yes" \ -o "$enabled_kdfs" = "no"; then enabled_kdfs=$default_kdfs fi AC_MSG_CHECKING([which key derivation functions to include]) for kdf in $enabled_kdfs; do LIST_MEMBER($kdf, $available_kdfs) if test "$found" = "0"; then AC_MSG_ERROR([unsupported key derivation function specified]) fi done AC_MSG_RESULT([$enabled_kdfs]) # Implementation of the --enable-random switch. AC_ARG_ENABLE(random, AS_HELP_STRING([--enable-random=name], [select which random number generator to use]), [random=`echo $enableval | tr '[A-Z]' '[a-z]'`], []) if test "x$random" = "x" -o "$random" = "yes" -o "$random" = "no"; then random=default fi AC_MSG_CHECKING([which random module to use]) if test "$random" != "default" -a "$random" != "auto"; then LIST_MEMBER($random, $available_random_modules) if test "$found" = "0"; then AC_MSG_ERROR([unsupported random module specified]) fi fi AC_MSG_RESULT($random) # Implementation of the --disable-dev-random switch. AC_MSG_CHECKING([whether use of /dev/random is requested]) AC_ARG_ENABLE(dev-random, [ --disable-dev-random disable the use of dev random], try_dev_random=$enableval, try_dev_random=yes) AC_MSG_RESULT($try_dev_random) # Implementation of the --with-egd-socket switch. AC_ARG_WITH(egd-socket, [ --with-egd-socket=NAME Use NAME for the EGD socket)], egd_socket_name="$withval", egd_socket_name="" ) AC_DEFINE_UNQUOTED(EGD_SOCKET_NAME, "$egd_socket_name", [Define if you don't want the default EGD socket name. For details see cipher/rndegd.c]) # Implementation of --disable-asm. AC_MSG_CHECKING([whether MPI and cipher assembler modules are requested]) AC_ARG_ENABLE([asm], AS_HELP_STRING([--disable-asm], [Disable MPI and cipher assembler modules]), [try_asm_modules=$enableval], [try_asm_modules=yes]) AC_MSG_RESULT($try_asm_modules) if test "$try_asm_modules" != yes ; then AC_DEFINE(ASM_DISABLED,1,[Defined if --disable-asm was used to configure]) fi # Implementation of the --enable-large-data-tests switch. AC_MSG_CHECKING([whether to run large data tests]) AC_ARG_ENABLE(large-data-tests, AS_HELP_STRING([--enable-large-data-tests], [Enable the real long ruinning large data tests]), large_data_tests=$enableval,large_data_tests=no) AC_MSG_RESULT($large_data_tests) AC_SUBST(RUN_LARGE_DATA_TESTS, $large_data_tests) # Implementation of --enable-force-soft-hwfeatures AC_MSG_CHECKING([whether 'soft' HW feature bits are forced on]) AC_ARG_ENABLE([force-soft-hwfeatures], AS_HELP_STRING([--enable-force-soft-hwfeatures], [Enable forcing 'soft' HW feature bits on]), [force_soft_hwfeatures=$enableval], [force_soft_hwfeatures=no]) AC_MSG_RESULT($force_soft_hwfeatures) # Implementation of the --with-capabilities switch. # Check whether we want to use Linux capabilities AC_MSG_CHECKING([whether use of capabilities is requested]) AC_ARG_WITH(capabilities, AS_HELP_STRING([--with-capabilities], [Use linux capabilities [default=no]]), [use_capabilities="$withval"],[use_capabilities=no]) AC_MSG_RESULT($use_capabilities) # Implementation of the --enable-hmac-binary-check. AC_MSG_CHECKING([whether a HMAC binary check is requested]) AC_ARG_ENABLE(hmac-binary-check, AS_HELP_STRING([--enable-hmac-binary-check], [Enable library integrity check]), [use_hmac_binary_check="$enableval"], [use_hmac_binary_check=no]) AC_MSG_RESULT($use_hmac_binary_check) if test "$use_hmac_binary_check" = no ; then DEF_HMAC_BINARY_CHECK='' else AC_DEFINE(ENABLE_HMAC_BINARY_CHECK,1, [Define to support an HMAC based integrity check]) AC_CHECK_TOOL(OBJCOPY, [objcopy]) AC_CHECK_TOOL(READELF, [readelf]) if test "$use_hmac_binary_check" != yes ; then DEF_HMAC_BINARY_CHECK=-DKEY_FOR_BINARY_CHECK="'\"$use_hmac_binary_check\"'" fi fi AM_CONDITIONAL(USE_HMAC_BINARY_CHECK, test "x$use_hmac_binary_check" != xno) AC_SUBST(DEF_HMAC_BINARY_CHECK) # Implementation of the --with-fips-module-version. AC_ARG_WITH(fips-module-version, AS_HELP_STRING([--with-fips-module-version=VERSION], [Specify the FIPS module version for the build]), fips_module_version="$withval", fips_module_version="" ) AC_DEFINE_UNQUOTED(FIPS_MODULE_VERSION, "$fips_module_version", [Define FIPS module version for certification]) # Implementation of the --disable-jent-support switch. AC_MSG_CHECKING([whether jitter entropy support is requested]) AC_ARG_ENABLE(jent-support, AS_HELP_STRING([--disable-jent-support], [Disable support for the Jitter entropy collector]), jentsupport=$enableval,jentsupport=yes) AC_MSG_RESULT($jentsupport) # Implementation of the --disable-padlock-support switch. AC_MSG_CHECKING([whether padlock support is requested]) AC_ARG_ENABLE(padlock-support, AS_HELP_STRING([--disable-padlock-support], [Disable support for the PadLock Engine of VIA processors]), padlocksupport=$enableval,padlocksupport=yes) AC_MSG_RESULT($padlocksupport) # Implementation of the --disable-aesni-support switch. AC_MSG_CHECKING([whether AESNI support is requested]) AC_ARG_ENABLE(aesni-support, AS_HELP_STRING([--disable-aesni-support], [Disable support for the Intel AES-NI instructions]), aesnisupport=$enableval,aesnisupport=yes) AC_MSG_RESULT($aesnisupport) # Implementation of the --disable-shaext-support switch. AC_MSG_CHECKING([whether SHAEXT support is requested]) AC_ARG_ENABLE(shaext-support, AS_HELP_STRING([--disable-shaext-support], [Disable support for the Intel SHAEXT instructions]), shaextsupport=$enableval,shaextsupport=yes) AC_MSG_RESULT($shaextsupport) # Implementation of the --disable-pclmul-support switch. AC_MSG_CHECKING([whether PCLMUL support is requested]) AC_ARG_ENABLE(pclmul-support, AS_HELP_STRING([--disable-pclmul-support], [Disable support for the Intel PCLMUL instructions]), pclmulsupport=$enableval,pclmulsupport=yes) AC_MSG_RESULT($pclmulsupport) # Implementation of the --disable-sse41-support switch. AC_MSG_CHECKING([whether SSE4.1 support is requested]) AC_ARG_ENABLE(sse41-support, AS_HELP_STRING([--disable-sse41-support], [Disable support for the Intel SSE4.1 instructions]), sse41support=$enableval,sse41support=yes) AC_MSG_RESULT($sse41support) # Implementation of the --disable-drng-support switch. AC_MSG_CHECKING([whether DRNG support is requested]) AC_ARG_ENABLE(drng-support, AS_HELP_STRING([--disable-drng-support], [Disable support for the Intel DRNG (RDRAND instruction)]), drngsupport=$enableval,drngsupport=yes) AC_MSG_RESULT($drngsupport) # Implementation of the --disable-avx-support switch. AC_MSG_CHECKING([whether AVX support is requested]) AC_ARG_ENABLE(avx-support, AS_HELP_STRING([--disable-avx-support], [Disable support for the Intel AVX instructions]), avxsupport=$enableval,avxsupport=yes) AC_MSG_RESULT($avxsupport) # Implementation of the --disable-avx2-support switch. AC_MSG_CHECKING([whether AVX2 support is requested]) AC_ARG_ENABLE(avx2-support, AS_HELP_STRING([--disable-avx2-support], [Disable support for the Intel AVX2 instructions]), avx2support=$enableval,avx2support=yes) AC_MSG_RESULT($avx2support) # Implementation of the --disable-avx512-support switch. AC_MSG_CHECKING([whether AVX512 support is requested]) AC_ARG_ENABLE(avx512-support, AS_HELP_STRING([--disable-avx512-support], [Disable support for the Intel AVX512 instructions]), avx512support=$enableval,avx512support=yes) AC_MSG_RESULT($avx512support) # Implementation of the --disable-gfni-support switch. AC_MSG_CHECKING([whether GFNI support is requested]) AC_ARG_ENABLE(gfni-support, AS_HELP_STRING([--disable-gfni-support], [Disable support for the Intel GFNI instructions]), gfnisupport=$enableval,gfnisupport=yes) AC_MSG_RESULT($gfnisupport) # Implementation of the --disable-neon-support switch. AC_MSG_CHECKING([whether NEON support is requested]) AC_ARG_ENABLE(neon-support, AS_HELP_STRING([--disable-neon-support], [Disable support for the ARM NEON instructions]), neonsupport=$enableval,neonsupport=yes) AC_MSG_RESULT($neonsupport) # Implementation of the --disable-arm-crypto-support switch. AC_MSG_CHECKING([whether ARMv8 Crypto Extension support is requested]) AC_ARG_ENABLE(arm-crypto-support, AS_HELP_STRING([--disable-arm-crypto-support], [Disable support for the ARMv8 Crypto Extension instructions]), armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) # Implementation of the --disable-sve-support switch. AC_MSG_CHECKING([whether SVE support is requested]) AC_ARG_ENABLE(sve-support, AS_HELP_STRING([--disable-sve-support], [Disable support for the ARMv8 SVE instructions]), svesupport=$enableval,svesupport=yes) AC_MSG_RESULT($svesupport) # Implementation of the --disable-sve2-support switch. AC_MSG_CHECKING([whether SVE2 support is requested]) AC_ARG_ENABLE(sve2-support, AS_HELP_STRING([--disable-sve2-support], [Disable support for the ARMv9 SVE2 instructions]), sve2support=$enableval,sve2support=yes) AC_MSG_RESULT($sve2support) # Implementation of the --disable-ppc-crypto-support switch. AC_MSG_CHECKING([whether PPC crypto support is requested]) AC_ARG_ENABLE(ppc-crypto-support, AS_HELP_STRING([--disable-ppc-crypto-support], [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]), ppccryptosupport=$enableval,ppccryptosupport=yes) AC_MSG_RESULT($ppccryptosupport) # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], AS_HELP_STRING([--disable-O-flag-munging], [Disable modification of the cc -O flag]), [enable_o_flag_munging=$enableval], [enable_o_flag_munging=yes]) AC_MSG_RESULT($enable_o_flag_munging) AM_CONDITIONAL(ENABLE_O_FLAG_MUNGING, test "$enable_o_flag_munging" = "yes") # Implementation of the --disable-instrumentation-munging switch. AC_MSG_CHECKING([whether a instrumentation (-fprofile, -fsanitize) munging is requested]) AC_ARG_ENABLE([instrumentation-munging], AS_HELP_STRING([--disable-instrumentation-munging], [Disable modification of the cc instrumentation options]), [enable_instrumentation_munging=$enableval], [enable_instrumentation_munging=yes]) AC_MSG_RESULT($enable_instrumentation_munging) AM_CONDITIONAL(ENABLE_INSTRUMENTATION_MUNGING, test "$enable_instrumentation_munging" = "yes") # Implementation of the --disable-amd64-as-feature-detection switch. AC_MSG_CHECKING([whether to enable AMD64 as(1) feature detection]) AC_ARG_ENABLE(amd64-as-feature-detection, AS_HELP_STRING([--disable-amd64-as-feature-detection], [Disable the auto-detection of AMD64 as(1) features]), amd64_as_feature_detection=$enableval, amd64_as_feature_detection=yes) AC_MSG_RESULT($amd64_as_feature_detection) AC_DEFINE_UNQUOTED(PRINTABLE_OS_NAME, "$PRINTABLE_OS_NAME", [A human readable text with the name of the OS]) # For some systems we know that we have ld_version scripts. # Use it then as default. have_ld_version_script=no case "${host}" in *-*-linux*) have_ld_version_script=yes ;; *-*-gnu*) have_ld_version_script=yes ;; esac AC_ARG_ENABLE([ld-version-script], AS_HELP_STRING([--enable-ld-version-script], [enable/disable use of linker version script. (default is system dependent)]), [have_ld_version_script=$enableval], [ : ] ) AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$have_ld_version_script" = "yes") AC_DEFINE_UNQUOTED(NAME_OF_DEV_RANDOM, "$NAME_OF_DEV_RANDOM", [defined to the name of the strong random device]) AC_DEFINE_UNQUOTED(NAME_OF_DEV_URANDOM, "$NAME_OF_DEV_URANDOM", [defined to the name of the weaker random device]) ############################### #### Checks for libraries. #### ############################### # # gpg-error is required. # AM_PATH_GPG_ERROR("$NEED_GPG_ERROR_VERSION") if test "x$GPG_ERROR_LIBS" = "x"; then AC_MSG_ERROR([libgpg-error is needed. See ftp://ftp.gnupg.org/gcrypt/libgpg-error/ .]) fi AC_DEFINE(GPG_ERR_SOURCE_DEFAULT, GPG_ERR_SOURCE_GCRYPT, [The default error source for libgcrypt.]) AM_CONDITIONAL(USE_GPGRT_CONFIG, [test -n "$GPGRT_CONFIG" \ -a "$ac_cv_path_GPG_ERROR_CONFIG" = no]) # # Check whether pthreads is available # if test "$have_w32_system" != yes; then AC_CHECK_LIB(pthread,pthread_create,have_pthread=yes) if test "$have_pthread" = yes; then AC_DEFINE(HAVE_PTHREAD, 1 ,[Define if we have pthread.]) fi fi # Solaris needs -lsocket and -lnsl. Unisys system includes # gethostbyname in libsocket but needs libnsl for socket. AC_SEARCH_LIBS(setsockopt, [socket], , [AC_SEARCH_LIBS(setsockopt, [socket], , , [-lnsl])]) AC_SEARCH_LIBS(setsockopt, [nsl]) ################################## #### Checks for header files. #### ################################## AC_CHECK_HEADERS(unistd.h sys/auxv.h sys/random.h sys/sysctl.h) ########################################## #### Checks for typedefs, structures, #### #### and compiler characteristics. #### ########################################## AC_C_CONST AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_PID_T AC_CHECK_TYPES([byte, ushort, u16, u32, u64]) # # Check for __builtin_bswap32 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap32, [gcry_cv_have_builtin_bswap32], [gcry_cv_have_builtin_bswap32=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [int x = 0; int y = __builtin_bswap32(x); return y;])], [gcry_cv_have_builtin_bswap32=yes])]) if test "$gcry_cv_have_builtin_bswap32" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP32,1, [Defined if compiler has '__builtin_bswap32' intrinsic]) fi # # Check for __builtin_bswap64 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap64, [gcry_cv_have_builtin_bswap64], [gcry_cv_have_builtin_bswap64=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [long long x = 0; long long y = __builtin_bswap64(x); return y;])], [gcry_cv_have_builtin_bswap64=yes])]) if test "$gcry_cv_have_builtin_bswap64" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP64,1, [Defined if compiler has '__builtin_bswap64' intrinsic]) fi # # Check for __builtin_ctz intrinsic. # AC_CACHE_CHECK(for __builtin_ctz, [gcry_cv_have_builtin_ctz], [gcry_cv_have_builtin_ctz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_ctz(x); return y;])], [gcry_cv_have_builtin_ctz=yes])]) if test "$gcry_cv_have_builtin_ctz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZ, 1, [Defined if compiler has '__builtin_ctz' intrinsic]) fi # # Check for __builtin_ctzl intrinsic. # AC_CACHE_CHECK(for __builtin_ctzl, [gcry_cv_have_builtin_ctzl], [gcry_cv_have_builtin_ctzl=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned long x = 0; long y = __builtin_ctzl(x); return y;])], [gcry_cv_have_builtin_ctzl=yes])]) if test "$gcry_cv_have_builtin_ctzl" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZL, 1, [Defined if compiler has '__builtin_ctzl' intrinsic]) fi # # Check for __builtin_clz intrinsic. # AC_CACHE_CHECK(for __builtin_clz, [gcry_cv_have_builtin_clz], [gcry_cv_have_builtin_clz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_clz(x); return y;])], [gcry_cv_have_builtin_clz=yes])]) if test "$gcry_cv_have_builtin_clz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CLZ, 1, [Defined if compiler has '__builtin_clz' intrinsic]) fi # # Check for __builtin_clzl intrinsic. # AC_CACHE_CHECK(for __builtin_clzl, [gcry_cv_have_builtin_clzl], [gcry_cv_have_builtin_clzl=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned long x = 0; long y = __builtin_clzl(x); return y;])], [gcry_cv_have_builtin_clzl=yes])]) if test "$gcry_cv_have_builtin_clzl" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CLZL, 1, [Defined if compiler has '__builtin_clzl' intrinsic]) fi # # Check for __sync_synchronize intrinsic. # AC_CACHE_CHECK(for __sync_synchronize, [gcry_cv_have_sync_synchronize], [gcry_cv_have_sync_synchronize=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [__sync_synchronize(); return 0;])], [gcry_cv_have_sync_synchronize=yes])]) if test "$gcry_cv_have_sync_synchronize" = "yes" ; then AC_DEFINE(HAVE_SYNC_SYNCHRONIZE, 1, [Defined if compiler has '__sync_synchronize' intrinsic]) fi # # Check for VLA support (variable length arrays). # AC_CACHE_CHECK(whether the variable length arrays are supported, [gcry_cv_have_vla], [gcry_cv_have_vla=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void f1(char *, int); char foo(int i) { char b[(i < 0 ? 0 : i) + 1]; f1(b, sizeof b); return b[0];}]])], [gcry_cv_have_vla=yes])]) if test "$gcry_cv_have_vla" = "yes" ; then AC_DEFINE(HAVE_VLA,1, [Defined if variable length arrays are supported]) fi # # Check for ELF visibility support. # AC_CACHE_CHECK(whether the visibility attribute is supported, gcry_cv_visibility_attribute, [gcry_cv_visibility_attribute=no AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo __attribute__ ((visibility ("hidden"))) = 1; int bar __attribute__ ((visibility ("protected"))) = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden.*foo' conftest.s >/dev/null 2>&1 ; then if grep '\.protected.*bar' conftest.s >/dev/null 2>&1; then gcry_cv_visibility_attribute=yes fi fi fi ]) if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken visibility attribute, gcry_cv_broken_visibility_attribute, [gcry_cv_broken_visibility_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo (int x); int bar (int x) __asm__ ("foo") __attribute__ ((visibility ("hidden"))); int bar (int x) { return x; } ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden@<:@ _@:>@foo' conftest.s >/dev/null 2>&1; then gcry_cv_broken_visibility_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken alias attribute, gcry_cv_broken_alias_attribute, [gcry_cv_broken_alias_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[extern int foo (int x) __asm ("xyzzy"); int bar (int x) { return x; } extern __typeof (bar) foo __attribute ((weak, alias ("bar"))); extern int dfoo; extern __typeof (dfoo) dfoo __asm ("abccb"); int dfoo = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep 'xyzzy' conftest.s >/dev/null 2>&1 && \ grep 'abccb' conftest.s >/dev/null 2>&1; then gcry_cv_broken_alias_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(if gcc supports -fvisibility=hidden, gcry_cv_gcc_has_f_visibility, [gcry_cv_gcc_has_f_visibility=no _gcc_cflags_save=$CFLAGS CFLAGS="-fvisibility=hidden" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])], gcry_cv_gcc_has_f_visibility=yes) CFLAGS=$_gcc_cflags_save; ]) fi if test "$gcry_cv_visibility_attribute" = "yes" \ && test "$gcry_cv_broken_visibility_attribute" != "yes" \ && test "$gcry_cv_broken_alias_attribute" != "yes" \ && test "$gcry_cv_gcc_has_f_visibility" = "yes" then AC_DEFINE(GCRY_USE_VISIBILITY, 1, [Define to use the GNU C visibility attribute.]) CFLAGS="$CFLAGS -fvisibility=hidden" fi # Following attribute tests depend on warnings to cause compile to fail, # so set -Werror temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether the compiler supports the GCC style aligned attribute # AC_CACHE_CHECK([whether the GCC style aligned attribute is supported], [gcry_cv_gcc_attribute_aligned], [gcry_cv_gcc_attribute_aligned=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct { int a; } foo __attribute__ ((aligned (16)));]])], [gcry_cv_gcc_attribute_aligned=yes])]) if test "$gcry_cv_gcc_attribute_aligned" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_ALIGNED,1, [Defined if a GCC style "__attribute__ ((aligned (n))" is supported]) fi # # Check whether the compiler supports the GCC style packed attribute # AC_CACHE_CHECK([whether the GCC style packed attribute is supported], [gcry_cv_gcc_attribute_packed], [gcry_cv_gcc_attribute_packed=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct foolong_s { long b; } __attribute__ ((packed)); struct foo_s { char a; struct foolong_s b; } __attribute__ ((packed)); enum bar { FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))), };]])], [gcry_cv_gcc_attribute_packed=yes])]) if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1, [Defined if a GCC style "__attribute__ ((packed))" is supported]) fi # # Check whether the compiler supports the GCC style may_alias attribute # AC_CACHE_CHECK([whether the GCC style may_alias attribute is supported], [gcry_cv_gcc_attribute_may_alias], [gcry_cv_gcc_attribute_may_alias=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[typedef struct foo_s { int a; } __attribute__ ((may_alias)) foo_t;]])], [gcry_cv_gcc_attribute_may_alias=yes])]) if test "$gcry_cv_gcc_attribute_may_alias" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MAY_ALIAS,1, [Defined if a GCC style "__attribute__ ((may_alias))" is supported]) fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether the compiler supports 'asm' or '__asm__' keyword for # assembler blocks. # AC_CACHE_CHECK([whether 'asm' assembler keyword is supported], [gcry_cv_have_asm], [gcry_cv_have_asm=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm("":::"memory"); }]])], [gcry_cv_have_asm=yes])]) AC_CACHE_CHECK([whether '__asm__' assembler keyword is supported], [gcry_cv_have___asm__], [gcry_cv_have___asm__=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("":::"memory"); }]])], [gcry_cv_have___asm__=yes])]) if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_DEFINE(asm,__asm__, [Define to supported assembler block keyword, if plain 'asm' was not supported]) fi fi # # Check whether the compiler supports inline assembly memory barrier. # if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(int x) { __asm__ volatile("":::"memory"); __asm__ volatile("":"+r"(x)::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi else AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(int x) { asm volatile("":::"memory"); asm volatile("":"+r"(x)::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_VOLATILE_MEMORY,1, [Define if inline asm memory barrier is supported]) fi # # Check whether GCC assembler supports features needed for our ARM # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly implementations], [gcry_cv_gcc_arm_platform_as_ok], [if test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_arm_platform_as_ok="n/a" else gcry_cv_gcc_arm_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if assembler supports UAL syntax. */ ".syntax unified\n\t" ".arm\n\t" /* our assembly code is in ARM mode */ ".text\n\t" /* Following causes error if assembler ignored '.syntax unified'. */ "asmfunc:\n\t" "add r0, r0, r4, ror #12;\n\t" /* Test if '.type' and '.size' are supported. */ ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,%function;\n\t" ); void asmfunc(void);]], [ asmfunc(); ] )], [gcry_cv_gcc_arm_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARM assembly implementations]) fi # # Check whether GCC assembler supports features needed for our ARMv8/Aarch64 # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly implementations], [gcry_cv_gcc_aarch64_platform_as_ok], [if test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_aarch64_platform_as_ok="n/a" else gcry_cv_gcc_aarch64_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" "asmfunc:\n\t" "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" ); void asmfunc(void);]], [ asmfunc(); ] )], [gcry_cv_gcc_aarch64_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARMv8/Aarch64 assembly implementations]) fi # # Check whether GCC assembler supports for CFI directives. # AC_CACHE_CHECK([whether GCC assembler supports for CFI directives], [gcry_cv_gcc_asm_cfi_directives], [gcry_cv_gcc_asm_cfi_directives=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" "ac_test:\n\t" ".cfi_startproc\n\t" ".cfi_remember_state\n\t" ".cfi_adjust_cfa_offset 8\n\t" ".cfi_rel_offset 0, 8\n\t" ".cfi_def_cfa_register 1\n\t" ".cfi_register 2, 3\n\t" ".cfi_restore 2\n\t" ".cfi_escape 0x0f, 0x02, 0x11, 0x00\n\t" ".cfi_restore_state\n\t" ".long 0\n\t" ".cfi_endproc\n\t" ); void asmfunc(void)]])], [gcry_cv_gcc_asm_cfi_directives=yes])]) if test "$gcry_cv_gcc_asm_cfi_directives" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_CFI_DIRECTIVES,1, [Defined if underlying assembler supports for CFI directives]) fi # # Check whether GCC assembler supports for ELF directives. # AC_CACHE_CHECK([whether GCC assembler supports for ELF directives], [gcry_cv_gcc_asm_elf_directives], [gcry_cv_gcc_asm_elf_directives=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if ELF directives '.type' and '.size' are supported. */ ".text\n\t" "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,STT_FUNC;\n\t" );]])], [gcry_cv_gcc_asm_elf_directives=yes])]) if test "$gcry_cv_gcc_asm_elf_directives" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_ELF_DIRECTIVES,1, [Defined if underlying assembler supports for ELF directives]) fi # # Check whether underscores in symbols are required. This needs to be # done before setting up the assembler stuff. # GNUPG_SYS_SYMBOL_UNDERSCORE() ################################# #### #### #### Setup assembler stuff. #### #### Define mpi_cpu_arch. #### #### #### ################################# AC_ARG_ENABLE(mpi-path, AS_HELP_STRING([--enable-mpi-path=EXTRA_PATH], [prepend EXTRA_PATH to list of CPU specific optimizations]), mpi_extra_path="$enableval",mpi_extra_path="") AC_MSG_CHECKING(architecture and mpi assembler functions) if test -f $srcdir/mpi/config.links ; then . $srcdir/mpi/config.links AC_CONFIG_LINKS("$mpi_ln_list") ac_cv_mpi_sflags="$mpi_sflags" AC_MSG_RESULT($mpi_cpu_arch) else AC_MSG_RESULT(failed) AC_MSG_ERROR([mpi/config.links missing!]) fi MPI_SFLAGS="$ac_cv_mpi_sflags" AC_SUBST(MPI_SFLAGS) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_ADD1, test "$mpi_mod_asm_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_SUB1, test "$mpi_mod_asm_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL1, test "$mpi_mod_asm_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL2, test "$mpi_mod_asm_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL3, test "$mpi_mod_asm_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_LSHIFT, test "$mpi_mod_asm_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_RSHIFT, test "$mpi_mod_asm_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV, test "$mpi_mod_asm_udiv" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV_QRNND, test "$mpi_mod_asm_udiv_qrnnd" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_ADD1, test "$mpi_mod_c_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_SUB1, test "$mpi_mod_c_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL1, test "$mpi_mod_c_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL2, test "$mpi_mod_c_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL3, test "$mpi_mod_c_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_LSHIFT, test "$mpi_mod_c_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_RSHIFT, test "$mpi_mod_c_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV, test "$mpi_mod_c_udiv" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes) # Reset non applicable feature flags. if test "$mpi_cpu_arch" != "x86" ; then aesnisupport="n/a" shaextsupport="n/a" pclmulsupport="n/a" sse41support="n/a" avxsupport="n/a" avx2support="n/a" avx512support="n/a" gfnisupport="n/a" padlocksupport="n/a" drngsupport="n/a" fi if test "$mpi_cpu_arch" != "arm" ; then if test "$mpi_cpu_arch" != "aarch64" ; then neonsupport="n/a" armcryptosupport="n/a" svesupport="n/a" sve2support="n/a" fi fi if test "$mpi_cpu_arch" != "ppc"; then ppccryptosupport="n/a" fi ############################################# #### #### #### Platform specific compiler checks. #### #### #### ############################################# # Following tests depend on warnings to cause compile to fail, so set -Werror # temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether compiler supports 'optimize' function attribute # AC_CACHE_CHECK([whether compiler supports 'optimize' function attribute], [gcry_cv_gcc_attribute_optimize], [gcry_cv_gcc_attribute_optimize=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((optimize("-O2"))) fn(int i){return i;}]])], [gcry_cv_gcc_attribute_optimize=yes])]) if test "$gcry_cv_gcc_attribute_optimize" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_OPTIMIZE,1, [Defined if compiler supports "__attribute__ ((optimize))" function attribute]) fi # # Check whether compiler supports 'ms_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute], [gcry_cv_gcc_attribute_ms_abi], [gcry_cv_gcc_attribute_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((ms_abi)) proto(int);]])], [gcry_cv_gcc_attribute_ms_abi=yes])]) if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1, [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute]) fi # # Check whether compiler supports 'sysv_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute], [gcry_cv_gcc_attribute_sysv_abi], [gcry_cv_gcc_attribute_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((sysv_abi)) proto(int);]])], [gcry_cv_gcc_attribute_sysv_abi=yes])]) if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1, [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute]) fi # # Check whether default calling convention is 'ms_abi'. # if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'ms_abi'], [gcry_cv_gcc_default_abi_is_ms_abi], [gcry_cv_gcc_default_abi_is_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((ms_abi))(*msabi_func)(void); /* warning on SysV abi targets, passes on Windows based targets */ msabi_func = def_func; return msabi_func; }]])], [gcry_cv_gcc_default_abi_is_ms_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1, [Defined if default calling convention is 'ms_abi']) fi fi # # Check whether default calling convention is 'sysv_abi'. # if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'], [gcry_cv_gcc_default_abi_is_sysv_abi], [gcry_cv_gcc_default_abi_is_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((sysv_abi))(*sysvabi_func)(void); /* warning on MS ABI targets, passes on SysV ABI targets */ sysvabi_func = def_func; return sysvabi_func; }]])], [gcry_cv_gcc_default_abi_is_sysv_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1, [Defined if default calling convention is 'sysv_abi']) fi fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports SSSE3 instructions # This is required for the AES-NI instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSSE3 instructions], [gcry_cv_gcc_inline_asm_ssse3], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ssse3="n/a" else gcry_cv_gcc_inline_asm_ssse3=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[static unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; void a(void) { __asm__("pshufb %[mask], %%xmm2\n\t"::[mask]"m"(*be_mask):); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_ssse3=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ssse3" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSSE3,1, [Defined if inline assembler supports SSSE3 instructions]) fi # # Check whether GCC inline assembler supports PCLMUL instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions], [gcry_cv_gcc_inline_asm_pclmul], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_pclmul="n/a" else gcry_cv_gcc_inline_asm_pclmul=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_pclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1, [Defined if inline assembler supports PCLMUL instructions]) fi # # Check whether GCC inline assembler supports SHA Extensions instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SHA Extensions instructions], [gcry_cv_gcc_inline_asm_shaext], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_shaext="n/a" else gcry_cv_gcc_inline_asm_shaext=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("sha1rnds4 \$0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1nexte %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg2 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256rnds2 %%xmm0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg2 %%xmm1, %%xmm3\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_shaext=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_shaext" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SHAEXT,1, [Defined if inline assembler supports SHA Extensions instructions]) fi # # Check whether GCC inline assembler supports SSE4.1 instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSE4.1 instructions], [gcry_cv_gcc_inline_asm_sse41], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_sse41="n/a" else gcry_cv_gcc_inline_asm_sse41=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { int i; __asm__("pextrd \$2, %%xmm0, %[out]\n\t" : [out] "=m" (i)); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_sse41=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_sse41" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSE41,1, [Defined if inline assembler supports SSE4.1 instructions]) fi # # Check whether GCC inline assembler supports AVX instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions], [gcry_cv_gcc_inline_asm_avx], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx="n/a" else gcry_cv_gcc_inline_asm_avx=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX,1, [Defined if inline assembler supports AVX instructions]) fi # # Check whether GCC inline assembler supports AVX2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions], [gcry_cv_gcc_inline_asm_avx2], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx2="n/a" else gcry_cv_gcc_inline_asm_avx2=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX2,1, [Defined if inline assembler supports AVX2 instructions]) fi # # Check whether GCC inline assembler supports AVX512 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX512 instructions], [gcry_cv_gcc_inline_asm_avx512], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx512="n/a" else gcry_cv_gcc_inline_asm_avx512=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vpopcntq %%zmm7, %%zmm1%{%%k1%}%{z%};\n\t":::"cc"); __asm__("vpexpandb %%zmm3, %%zmm1;\n\t":::"cc"); __asm__("vpxorq %%xmm7, %%xmm7, %%xmm7;\n\t":::"cc"); __asm__("vpxorq %%ymm7, %%ymm7, %%ymm7;\n\t":::"cc"); __asm__("vpxorq (%%eax)%{1to8%}, %%zmm7, %%zmm7;\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx512=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx512" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX512,1, [Defined if inline assembler supports AVX512 instructions]) fi # # Check whether GCC inline assembler supports VAES and VPCLMUL instructions # AC_CACHE_CHECK([whether GCC inline assembler supports VAES and VPCLMUL instructions], [gcry_cv_gcc_inline_asm_vaes_vpclmul], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_vaes_vpclmul="n/a" else gcry_cv_gcc_inline_asm_vaes_vpclmul=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("vaesenclast %%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/ __asm__("vaesenclast %%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/ __asm__("vpclmulqdq \$0,%%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/ __asm__("vpclmulqdq \$0,%%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/ }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_vaes_vpclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_vaes_vpclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL,1, [Defined if inline assembler supports VAES and VPCLMUL instructions]) fi # # Check whether GCC inline assembler supports GFNI instructions # AC_CACHE_CHECK([whether GCC inline assembler supports GFNI instructions], [gcry_cv_gcc_inline_asm_gfni], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_gfni="n/a" else gcry_cv_gcc_inline_asm_gfni=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("gf2p8affineqb \$123, %%xmm0, %%xmm0;\n\t":::"cc"); /* SSE */ __asm__("vgf2p8affineinvqb \$234, %%ymm1, %%ymm1, %%ymm1;\n\t":::"cc"); /* AVX */ __asm__("vgf2p8mulb (%%eax), %%zmm2, %%zmm2;\n\t":::"cc"); /* AVX512 */ }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_gfni=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_gfni" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_GFNI,1, [Defined if inline assembler supports GFNI instructions]) fi # # Check whether GCC inline assembler supports BMI2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports BMI2 instructions], [gcry_cv_gcc_inline_asm_bmi2], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_bmi2="n/a" else gcry_cv_gcc_inline_asm_bmi2=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[unsigned int a(unsigned int x, unsigned int y) { unsigned int tmp1, tmp2; asm ("rorxl %2, %1, %0" : "=r" (tmp1) : "rm0" (x), "J" (32 - ((23) & 31))); asm ("andnl %2, %1, %0" : "=r" (tmp2) : "r0" (x), "rm" (y)); return tmp1 + tmp2; }]], [ a(1, 2); ] )], [gcry_cv_gcc_inline_asm_bmi2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_bmi2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_BMI2,1, [Defined if inline assembler supports BMI2 instructions]) fi # # Check whether compiler supports x86/AVX512 intrinsics # _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -mavx512f" AC_CACHE_CHECK([whether compiler supports x86/AVX512 intrinsics], [gcry_cv_cc_x86_avx512_intrinsics], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_cc_x86_avx512_intrinsics="n/a" else gcry_cv_cc_x86_avx512_intrinsics=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include __m512i fn(void *in, __m128i y) { __m512i x; x = _mm512_loadu_epi32 (in); /* check the GCC bug 90980. */ x = _mm512_maskz_loadu_epi32(_cvtu32_mask16(0xfff0), in) ^ _mm512_castsi128_si512(y); asm volatile ("vinserti32x4 \$3, %0, %%zmm6, %%zmm6;\n\t" "vpxord %%zmm6, %%zmm6, %%zmm6" ::"x"(y),"r"(in):"memory","xmm6"); return x; } ]])], [gcry_cv_cc_x86_avx512_intrinsics=yes]) fi]) if test "$gcry_cv_cc_x86_avx512_intrinsics" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_X86_AVX512_INTRINSICS,1, [Defined if underlying compiler supports x86/AVX512 intrinsics]) fi AM_CONDITIONAL(ENABLE_X86_AVX512_INTRINSICS_EXTRA_CFLAGS, test "$gcry_cv_cc_x86_avx512_intrinsics" = "yes") # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC assembler needs "-Wa,--divide" to correctly handle # constant division # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler handles division correctly], [gcry_cv_gcc_as_const_division_ok], [gcry_cv_gcc_as_const_division_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t"); void fn(void);]], [fn();])], [gcry_cv_gcc_as_const_division_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_ok" = "no" ; then # # Add '-Wa,--divide' to CPPFLAGS and try check again. # _gcc_cppflags_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -Wa,--divide" AC_CACHE_CHECK([whether GCC assembler handles division correctly with "-Wa,--divide"], [gcry_cv_gcc_as_const_division_with_wadivide_ok], [gcry_cv_gcc_as_const_division_with_wadivide_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t"); void fn(void);]], [fn();])], [gcry_cv_gcc_as_const_division_with_wadivide_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_with_wadivide_ok" = "no" ; then # '-Wa,--divide' did not work, restore old flags. CPPFLAGS="$_gcc_cppflags_save" fi fi fi # -# Check whether GCC assembler supports features needed for our amd64 +# Check whether GCC assembler supports features needed for our i386/amd64 # implementations # if test $amd64_as_feature_detection = yes; then - AC_CACHE_CHECK([whether GCC assembler is compatible for amd64 assembly implementations], - [gcry_cv_gcc_amd64_platform_as_ok], + AC_CACHE_CHECK([whether GCC assembler is compatible for i386/amd64 assembly implementations], + [gcry_cv_gcc_x86_platform_as_ok], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then - gcry_cv_gcc_amd64_platform_as_ok="n/a" + gcry_cv_gcc_x86_platform_as_ok="n/a" else - gcry_cv_gcc_amd64_platform_as_ok=no + gcry_cv_gcc_x86_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if '.type' and '.size' are supported. */ /* These work only on ELF targets. */ ".text\n\t" "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,@function;\n\t" /* Test if assembler allows use of '/' for constant division * (Solaris/x86 issue). If previous constant division check * and "-Wa,--divide" workaround failed, this causes assembly * to be disable on this machine. */ "xorl \$(123456789/12345678), %ebp;\n\t" ); void asmfunc(void);]], [ asmfunc(); ])], - [gcry_cv_gcc_amd64_platform_as_ok=yes]) + [gcry_cv_gcc_x86_platform_as_ok=yes]) fi]) - if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then - AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1, + if test "$gcry_cv_gcc_x86_platform_as_ok" = "yes" && + test "$ac_cv_sizeof_unsigned_long" = "8"; then + AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with amd64 assembly implementations]) fi - if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" && + if test "$gcry_cv_gcc_x86_platform_as_ok" = "yes" && + test "$ac_cv_sizeof_unsigned_long" = "4"; then + AC_DEFINE(HAVE_COMPATIBLE_GCC_I386_PLATFORM_AS,1, + [Defined if underlying assembler is compatible with i386 assembly implementations]) + fi + if test "$gcry_cv_gcc_x86_platform_as_ok" = "no" && test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" && test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations], [gcry_cv_gcc_win64_platform_as_ok], [gcry_cv_gcc_win64_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" ".globl asmfunc\n\t" "asmfunc:\n\t" "xorq \$(1234), %rbp;\n\t" ); void asmfunc(void);]], [ asmfunc(); ])], [gcry_cv_gcc_win64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with WIN64 assembly implementations]) fi fi + if test "$gcry_cv_gcc_x86_platform_as_ok" = "no" && + test "$ac_cv_sizeof_unsigned_long" = "4"; then + AC_CACHE_CHECK([whether GCC assembler is compatible for WIN32 assembly implementations], + [gcry_cv_gcc_win32_platform_as_ok], + [gcry_cv_gcc_win32_platform_as_ok=no + AC_LINK_IFELSE([AC_LANG_PROGRAM( + [[__asm__( + ".text\n\t" + ".globl _asmfunc\n\t" + "_asmfunc:\n\t" + "xorl \$(1234), %ebp;\n\t" + ); + void asmfunc(void);]], [ asmfunc(); ])], + [gcry_cv_gcc_win32_platform_as_ok=yes])]) + if test "$gcry_cv_gcc_win32_platform_as_ok" = "yes" ; then + AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN32_PLATFORM_AS,1, + [Defined if underlying assembler is compatible with WIN32 assembly implementations]) + fi + fi fi # # Check whether GCC assembler supports features needed for assembly # implementations that use Intel syntax # AC_CACHE_CHECK([whether GCC assembler is compatible for Intel syntax assembly implementations], [gcry_cv_gcc_platform_as_ok_for_intel_syntax], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_platform_as_ok_for_intel_syntax="n/a" else gcry_cv_gcc_platform_as_ok_for_intel_syntax=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".intel_syntax noprefix\n\t" ".text\n\t" "actest:\n\t" "pxor xmm1, xmm7;\n\t" "vperm2i128 ymm2, ymm3, ymm0, 1;\n\t" "add eax, ebp;\n\t" "rorx eax, ebp, 1;\n\t" "sub eax, [esp + 4];\n\t" "add dword ptr [esp + eax], 0b10101;\n\t" ".att_syntax prefix\n\t" ); void actest(void);]], [ actest(); ])], [gcry_cv_gcc_platform_as_ok_for_intel_syntax=yes]) fi]) if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then AC_DEFINE(HAVE_INTEL_SYNTAX_PLATFORM_AS,1, [Defined if underlying assembler is compatible with Intel syntax assembly implementations]) fi # # Check whether compiler is configured for ARMv6 or newer architecture # AC_CACHE_CHECK([whether compiler is configured for ARMv6 or newer architecture], [gcry_cv_cc_arm_arch_is_v6], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_cc_arm_arch_is_v6="n/a" else gcry_cv_cc_arm_arch_is_v6=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[ #if defined(__arm__) && \ ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7EM__)) /* empty */ #else /* fail compile if not ARMv6. */ not_armv6 not_armv6 = (not_armv6)not_armv6; #endif ]])], [gcry_cv_cc_arm_arch_is_v6=yes]) fi]) if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then AC_DEFINE(HAVE_ARM_ARCH_V6,1, [Defined if ARM architecture is v6 or newer]) fi # # Check whether GCC inline assembler supports NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports NEON instructions], [gcry_cv_gcc_inline_asm_neon], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_neon="n/a" else gcry_cv_gcc_inline_asm_neon=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".syntax unified\n\t" ".arm\n\t" ".fpu neon\n\t" ".text\n\t" "testfn:\n\t" "vld1.64 {q0-q1}, [r0]!;\n\t" "vrev64.8 q0, q3;\n\t" "vadd.u64 q0, q1;\n\t" "vadd.s64 d3, d2, d3;\n\t" ); void testfn(void); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_NEON,1, [Defined if inline assembler supports NEON instructions]) fi # # Check whether GCC inline assembler supports AArch32 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch32 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch32_crypto], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch32_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch32_crypto=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".syntax unified\n\t" ".arch armv8-a\n\t" ".arm\n\t" ".fpu crypto-neon-fp-armv8\n\t" ".text\n\t" "testfn:\n\t" "sha1h.32 q0, q0;\n\t" "sha1c.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha1su0.32 q0, q0, q0;\n\t" "sha1su1.32 q0, q0;\n\t" "sha256h.32 q0, q0, q0;\n\t" "sha256h2.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha256su0.32 q0, q0;\n\t" "sha256su1.32 q0, q0, q15;\n\t" "aese.8 q0, q0;\n\t" "aesd.8 q0, q0;\n\t" "aesmc.8 q0, q0;\n\t" "aesimc.8 q0, q0;\n\t" "vmull.p64 q0, d0, d0;\n\t" ); void testfn(void); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch32_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO,1, [Defined if inline assembler supports AArch32 Crypto Extension instructions]) fi # # Check whether GCC inline assembler supports AArch64 NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 NEON instructions], [gcry_cv_gcc_inline_asm_aarch64_neon], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_neon="n/a" else gcry_cv_gcc_inline_asm_aarch64_neon=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".cpu generic+simd\n\t" ".text\n\t" "testfn:\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" ); void testfn(void); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_NEON,1, [Defined if inline assembler supports AArch64 NEON instructions]) fi # # Check whether GCC inline assembler supports AArch64 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch64_crypto], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch64_crypto=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".cpu generic+simd+crypto\n\t" ".text\n\t" "testfn:\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" "sha1h s0, s0;\n\t" "sha1c q0, s0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha1su0 v0.4s, v0.4s, v0.4s;\n\t" "sha1su1 v0.4s, v0.4s;\n\t" "sha256h q0, q0, v0.4s;\n\t" "sha256h2 q0, q0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha256su0 v0.4s, v0.4s;\n\t" "sha256su1 v0.4s, v0.4s, v31.4s;\n\t" "aese v0.16b, v0.16b;\n\t" "aesd v0.16b, v0.16b;\n\t" "aesmc v0.16b, v0.16b;\n\t" "aesimc v0.16b, v0.16b;\n\t" "pmull v0.1q, v0.1d, v31.1d;\n\t" "pmull2 v0.1q, v0.2d, v31.2d;\n\t" ); void testfn(void); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO,1, [Defined if inline assembler supports AArch64 Crypto Extension instructions]) fi # # Check whether GCC inline assembler supports AArch64 SVE instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 SVE instructions], [gcry_cv_gcc_inline_asm_aarch64_sve], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_sve="n/a" else gcry_cv_gcc_inline_asm_aarch64_sve=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".cpu generic+simd+sve\n\t" ".text\n\t" "testfn:\n\t" "mov x0, \#60;\n\t" "whilelo p0.s, xzr, x0;\n\t" "mov z0.s, p0/z, \#55;\n\t" "ld1b {z0.b}, p0/z, [x1];\n\t" ); void testfn(void); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_sve=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_sve" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_SVE,1, [Defined if inline assembler supports AArch64 SVE instructions]) fi # # Check whether GCC inline assembler supports AArch64 SVE2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 SVE2 instructions], [gcry_cv_gcc_inline_asm_aarch64_sve2], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_sve2="n/a" else gcry_cv_gcc_inline_asm_aarch64_sve2=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".cpu generic+simd+sve2\n\t" ".text\n\t" "testfn:\n\t" ";\n\t" "eor3 z0.d, z0.d, z1.d, z2.d;\n\t" "ext z8.b, {z20.b, z21.b}, \#3;\n\t" "adclt z0.d, z1.d, z2.d;\n\t" "tbl z0.b, {z8.b, z9.b}, z1.b;\n\t" "addhnb z16.s, z17.d, z18.d;\n\t" "mov z0.s, p0/z, \#55;\n\t" "ld1b {z0.b}, p0/z, [x1];\n\t" ); void testfn(void); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_sve2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_sve2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_SVE2,1, [Defined if inline assembler supports AArch64 SVE2 instructions]) fi # # Check whether GCC inline assembler supports AArch64 SHA3/SHA512/SM3/SM4 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 SHA3/SHA512/SM3/SM4 instructions], [gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4="n/a" else gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".arch armv8.2-a+sha3+sm4\n\t" ".text\n\t" "testfn:\n\t" /* Test for SHA512 instructions */ "sha512h q0, q0, v0.2d;\n\t" "sha512h2 q0, q0, v0.2d;\n\t" "sha512su0 v0.2d, v0.2d;\n\t" "sha512su1 v0.2d, v0.2d, v31.2d;\n\t" /* Test for SHA3 instructions */ "bcax v0.16b, v1.16b, v2.16b, v3.16b;\n\t" "eor3 v0.16b, v1.16b, v2.16b, v3.16b;\n\t" "rax1 v0.2d, v1.2d, v2.2d;\n\t" "xar v0.2d, v1.2d, v2.2d, \#1;\n\t" /* Test for SM3 instructions */ "sm3partw1 v0.4s, v1.4s, v2.4s;\n\t" "sm3partw2 v0.4s, v1.4s, v2.4s;\n\t" "sm3ss1 v0.4s, v1.4s, v2.4s, v3.4s;\n\t" "sm3tt1a v0.4s, v1.4s, v2.s[0];\n\t" "sm3tt1b v0.4s, v1.4s, v2.s[0];\n\t" "sm3tt2a v0.4s, v1.4s, v2.s[0];\n\t" "sm3tt2b v0.4s, v1.4s, v2.s[0];\n\t" /* Test for SM4 instructions */ "sm4e v0.4s, v1.4s;\n\t" "sm4ekey v0.4s, v1.4s, v2.4s;\n\t" ); void testfn(void); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_sha3_sha512_sm3_sm4" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_SHA3_SHA512_SM3_SM4,1, [Defined if inline assembler supports AArch64 SHA3/SHA512/SM3/SM4 instructions]) fi # # Check whether compiler supports AArch64/NEON/crypto intrinsics # AC_CACHE_CHECK([whether compiler supports AArch64/NEON/crypto intrinsics], [gcry_cv_cc_aarch64_neon_intrinsics], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_cc_aarch64_neon_intrinsics="n/a" else gcry_cv_cc_aarch64_neon_intrinsics=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include #define __m128i uint64x2_t #define vpsrldq128(s, a, o) \ ({ uint64x2_t __tmp = { 0, 0 }; \ o = (__m128i)vextq_u8((uint8x16_t)a, \ (uint8x16_t)__tmp, (s) & 15); }) #define vaesenclast128(a, b, o) \ (o = (__m128i)vaeseq_u8((uint8x16_t)b, (uint8x16_t)a)) #define memory_barrier_with_vec(a) __asm__("" : "+w"(a) :: "memory") static inline __attribute__((always_inline)) __m128i fn2(__m128i a) { vpsrldq128(2, a, a); return a; } __m128i fn(__m128i in) { __m128i x; memory_barrier_with_vec(in); x = fn2(in); memory_barrier_with_vec(x); vaesenclast128(in, x, in); memory_barrier_with_vec(in); return in; } ]])], [gcry_cv_cc_aarch64_neon_intrinsics=yes]) fi]) if test "$gcry_cv_cc_aarch64_neon_intrinsics" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_AARCH64_NEON_INTRINSICS,1, [Defined if underlying compiler supports AArch64/NEON/crypto intrinsics]) fi _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -O2 -march=armv8-a+crypto" if test "$gcry_cv_cc_aarch64_neon_intrinsics" = "no" && test "$mpi_cpu_arch" = "aarch64" && test "$try_asm_modules" = "yes" ; then AC_CACHE_CHECK([whether compiler supports AArch64/NEON/crypto intrinsics with extra GCC flags], [gcry_cv_cc_aarch64_neon_intrinsics_cflags], [gcry_cv_cc_aarch64_neon_intrinsics_cflags=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include #define __m128i uint64x2_t #define vpsrldq128(s, a, o) \ ({ uint64x2_t __tmp = { 0, 0 }; \ o = (__m128i)vextq_u8((uint8x16_t)a, \ (uint8x16_t)__tmp, (s) & 15); }) #define vaesenclast128(a, b, o) \ (o = (__m128i)vaeseq_u8((uint8x16_t)b, (uint8x16_t)a)) #define memory_barrier_with_vec(a) __asm__("" : "+w"(a) :: "memory") static inline __attribute__((always_inline)) __m128i fn2(__m128i a) { vpsrldq128(2, a, a); return a; } __m128i fn(__m128i in) { __m128i x; memory_barrier_with_vec(in); x = fn2(in); memory_barrier_with_vec(x); vaesenclast128(in, x, in); memory_barrier_with_vec(in); return in; } ]])], [gcry_cv_cc_aarch64_neon_intrinsics_cflags=yes])]) if test "$gcry_cv_cc_aarch64_neon_intrinsics_cflags" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_AARCH64_NEON_INTRINSICS,1, [Defined if underlying compiler supports AArch64/NEON/crypto intrinsics]) AC_DEFINE(HAVE_COMPATIBLE_CC_AARCH64_NEON_INTRINSICS_WITH_CFLAGS,1, [Defined if underlying compiler supports AArch64/NEON/crypto intrinsics with extra GCC flags]) fi fi AM_CONDITIONAL(ENABLE_AARCH64_NEON_INTRINSICS_EXTRA_CFLAGS, test "$gcry_cv_cc_aarch64_neon_intrinsics_cflags" = "yes") # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether compiler supports PowerPC AltiVec/VSX intrinsics # AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics], [gcry_cv_cc_ppc_altivec], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_cc_ppc_altivec="n/a" else gcry_cv_cc_ppc_altivec=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include typedef vector unsigned char block; typedef vector unsigned int vecu32; static inline __attribute__((always_inline)) vecu32 vec_sld_u32(vecu32 a, vecu32 b, unsigned int idx) { return vec_sld (a, b, (4 * idx) & 15); } block fn(block in) { block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); vecu32 y = vec_vsx_ld (0, (unsigned int*)0); y = vec_sld_u32 (y, y, 3); return vec_cipher_be (t, in) ^ (block)y; } ]])], [gcry_cv_cc_ppc_altivec=yes]) fi]) if test "$gcry_cv_cc_ppc_altivec" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics]) fi _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -O2 -maltivec -mvsx -mcrypto" if test "$gcry_cv_cc_ppc_altivec" = "no" && test "$mpi_cpu_arch" = "ppc" && test "$try_asm_modules" = "yes" ; then AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags], [gcry_cv_cc_ppc_altivec_cflags], [gcry_cv_cc_ppc_altivec_cflags=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include typedef vector unsigned char block; typedef vector unsigned int vecu32; static inline __attribute__((always_inline)) vecu32 vec_sld_u32(vecu32 a, vecu32 b, unsigned int idx) { return vec_sld (a, b, (4 * idx) & 15); } block fn(block in) { block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); vecu32 y = vec_vsx_ld (0, (unsigned int*)0); y = vec_sld_u32 (y, y, 3); return vec_cipher_be (t, in) ^ (block)y; } ]])], [gcry_cv_cc_ppc_altivec_cflags=yes])]) if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics]) AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags]) fi fi AM_CONDITIONAL(ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS, test "$gcry_cv_cc_ppc_altivec_cflags" = "yes") # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions # AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions], [gcry_cv_gcc_inline_asm_ppc_altivec], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ppc_altivec="n/a" else gcry_cv_gcc_inline_asm_ppc_altivec=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".globl testfn;\n" ".text\n\t" "testfn:\n" "stvx %v31,%r12,%r0;\n" "lvx %v20,%r12,%r0;\n" "vcipher %v0, %v1, %v22;\n" "lxvw4x %vs32, %r0, %r1;\n" "vadduwm %v0, %v1, %v22;\n" "vshasigmaw %v0, %v1, 0, 15;\n" "vshasigmad %v0, %v1, 0, 15;\n" "vpmsumd %v11, %v11, %v11;\n" ); void testfn(void); ]], [ testfn(); ] )], [gcry_cv_gcc_inline_asm_ppc_altivec=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC,1, [Defined if inline assembler supports PowerPC AltiVec/VSX/crypto instructions]) fi # # Check whether GCC inline assembler supports PowerISA 3.00 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports PowerISA 3.00 instructions], [gcry_cv_gcc_inline_asm_ppc_arch_3_00], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ppc_arch_3_00="n/a" else gcry_cv_gcc_inline_asm_ppc_arch_3_00=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\t" ".globl testfn;\n" "testfn:\n" "stxvb16x %r1,%v12,%v30;\n" ); void testfn(void); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_ppc_arch_3_00=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00,1, [Defined if inline assembler supports PowerISA 3.00 instructions]) fi # # Check whether compiler supports GCC PowerPC target attributes # AC_CACHE_CHECK([whether compiler supports GCC PowerPC target attributes], [gcry_cv_gcc_attribute_ppc_target], [if test "$mpi_cpu_arch" != "ppc" ; then gcry_cv_gcc_attribute_ppc_target="n/a" else gcry_cv_gcc_attribute_ppc_target=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void __attribute__((always_inline)) inline aifn(void) {} void __attribute__((target("cpu=power8"))) testfn8(void) {aifn();} void __attribute__((target("cpu=power9"))) testfn9(void) { testfn8(); aifn(); } ]], [ testfn9(); aifn(); ])], [gcry_cv_gcc_attribute_ppc_target=yes]) fi]) if test "$gcry_cv_gcc_attribute_ppc_target" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_PPC_TARGET,1, [Defined if compiler supports GCC PowerPC target attributes]) fi # # Check whether compiler supports clang PowerPC target attributes # AC_CACHE_CHECK([whether compiler supports clang PowerPC target attributes], [gcry_cv_clang_attribute_ppc_target], [if test "$mpi_cpu_arch" != "ppc" ; then gcry_cv_clang_attribute_ppc_target="n/a" else gcry_cv_clang_attribute_ppc_target=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void __attribute__((always_inline)) inline aifn(void) {} void __attribute__((target("arch=pwr8"))) testfn8(void) {aifn();} void __attribute__((target("arch=pwr9"))) testfn9(void) { testfn8(); aifn(); } ]], [ testfn9(); aifn(); ])], [gcry_cv_clang_attribute_ppc_target=yes]) fi]) if test "$gcry_cv_clang_attribute_ppc_target" = "yes" ; then AC_DEFINE(HAVE_CLANG_ATTRIBUTE_PPC_TARGET,1, [Defined if compiler supports clang PowerPC target attributes]) fi # # Check whether GCC inline assembler supports zSeries instructions # AC_CACHE_CHECK([whether GCC inline assembler supports zSeries instructions], [gcry_cv_gcc_inline_asm_s390x], [if test "$mpi_cpu_arch" != "s390x" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_s390x="n/a" else gcry_cv_gcc_inline_asm_s390x=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[typedef unsigned int u128_t __attribute__ ((mode (TI))); unsigned int testfunc(unsigned int x, void *y, unsigned int z) { unsigned long fac[8]; register unsigned long reg0 asm("0") = 0; register unsigned long reg1 asm("1") = x; u128_t r1 = ((u128_t)(unsigned long)y << 64) | (unsigned long)z; u128_t r2 = 0; u128_t r3 = 0; asm volatile (".insn rre,0xb92e << 16, %[r1], %[r2]\n\t" : [r1] "+a" (r1), [r2] "+a" (r2) : "r" (reg0), "r" (reg1) : "cc", "memory"); asm volatile (".insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t" : [r1] "+a" (r1), [r2] "+a" (r2), [r3] "+a" (r3) : "r" (reg0), "r" (reg1) : "cc", "memory"); reg0 = 8 - 1; asm ("stfle %1\n\t" : "+d" (reg0), "=Q" (fac[0]) : : "cc", "memory"); asm volatile ("mvc 0(16, %0), 0(%1)\n\t" : : "a" (y), "a" (fac) : "memory"); asm volatile ("xc 0(16, %0), 0(%0)\n\t" : : "a" (fac) : "memory"); asm volatile ("risbgn %%r11, %%r11, 0, 129, 0\n\t" : : : "memory", "r11"); asm volatile ("algrk %%r14, %%r14, %%r14\n\t" : : : "memory", "r14"); return (unsigned int)r1 ^ reg0; } ]] , [ testfunc(0, 0, 0); ])], [gcry_cv_gcc_inline_asm_s390x=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_S390X,1, [Defined if inline assembler supports zSeries instructions]) fi # # Check whether GCC inline assembler supports zSeries vector instructions # AC_CACHE_CHECK([whether GCC inline assembler supports zSeries vector instructions], [gcry_cv_gcc_inline_asm_s390x_vx], [if test "$mpi_cpu_arch" != "s390x" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_s390x_vx="n/a" else gcry_cv_gcc_inline_asm_s390x_vx=no if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void testfunc(void) { asm volatile (".machine \"z13+vx\"\n\t" "vx %%v0, %%v1, %%v31\n\t" "verllf %%v11, %%v11, (16)(0)\n\t" : : : "memory"); } ]], [ testfunc(); ])], [gcry_cv_gcc_inline_asm_s390x_vx=yes]) fi fi]) if test "$gcry_cv_gcc_inline_asm_s390x_vx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_S390X_VX,1, [Defined if inline assembler supports zSeries vector instructions]) fi ####################################### #### Checks for library functions. #### ####################################### AC_FUNC_VPRINTF # We have replacements for these in src/missing-string.c AC_CHECK_FUNCS(stpcpy strcasecmp) # We have replacements for these in src/g10lib.h AC_CHECK_FUNCS(strtoul memmove stricmp atexit raise) # Other checks AC_CHECK_FUNCS(strerror rand mmap getpagesize sysconf waitpid wait4) AC_CHECK_FUNCS(gettimeofday getrusage gethrtime clock_gettime syslog) AC_CHECK_FUNCS(syscall fcntl ftruncate flockfile getauxval elf_aux_info) AC_CHECK_FUNCS(explicit_bzero explicit_memset getentropy sysctlbyname) GNUPG_CHECK_MLOCK # # Replacement functions. # AC_REPLACE_FUNCS([getpid clock]) # # Check whether it is necessary to link against libdl. # DL_LIBS="" if test "$use_hmac_binary_check" != no ; then _gcry_save_libs="$LIBS" LIBS="" AC_SEARCH_LIBS(dlopen, c dl,,,) DL_LIBS=$LIBS LIBS="$_gcry_save_libs" fi AC_SUBST(DL_LIBS) # # Check whether we can use Linux capabilities as requested. # if test "$use_capabilities" = "yes" ; then use_capabilities=no AC_CHECK_HEADERS(sys/capability.h) if test "$ac_cv_header_sys_capability_h" = "yes" ; then AC_CHECK_LIB(cap, cap_init, ac_need_libcap=1) if test "$ac_cv_lib_cap_cap_init" = "yes"; then AC_DEFINE(USE_CAPABILITIES,1, [define if capabilities should be used]) LIBS="$LIBS -lcap" use_capabilities=yes fi fi if test "$use_capabilities" = "no" ; then AC_MSG_WARN([[ *** *** The use of capabilities on this system is not possible. *** You need a recent Linux kernel and some patches: *** fcaps-2.2.9-990610.patch (kernel patch for 2.2.9) *** fcap-module-990613.tar.gz (kernel module) *** libcap-1.92.tar.gz (user mode library and utilities) *** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN *** set (filesystems menu). Be warned: This code is *really* ALPHA. ***]]) fi fi # Check whether a random device is available. if test "$try_dev_random" = yes ; then AC_CACHE_CHECK(for random device, ac_cv_have_dev_random, [if test -r "$NAME_OF_DEV_RANDOM" && test -r "$NAME_OF_DEV_URANDOM" ; then ac_cv_have_dev_random=yes; else ac_cv_have_dev_random=no; fi]) if test "$ac_cv_have_dev_random" = yes; then AC_DEFINE(HAVE_DEV_RANDOM,1, [defined if the system supports a random device] ) fi else AC_MSG_CHECKING(for random device) ac_cv_have_dev_random=no AC_MSG_RESULT(has been disabled) fi # Figure out the random modules for this configuration. if test "$random" = "default"; then # Select default value. if test "$ac_cv_func_getentropy" = yes; then random_modules="getentropy" elif test "$ac_cv_have_dev_random" = yes; then # Try Linuxish random device. random_modules="linux" else case "${host}" in *-*-mingw32ce*) # WindowsCE random device. random_modules="w32ce" ;; *-*-mingw32*|*-*-cygwin*) # Windows random device. random_modules="w32" ;; *) # Build everything, allow to select at runtime. random_modules="$auto_random_modules" ;; esac fi else if test "$random" = "auto"; then # Build everything, allow to select at runtime. random_modules="$auto_random_modules" else random_modules="$random" fi fi # # Other defines # if test mym4_isgit = "yes"; then AC_DEFINE(IS_DEVELOPMENT_VERSION,1, [Defined if this is not a regular release]) fi AM_CONDITIONAL(CROSS_COMPILING, test x$cross_compiling = xyes) # This is handy for debugging so the compiler doesn't rearrange # things and eliminate variables. AC_ARG_ENABLE(optimization, AS_HELP_STRING([--disable-optimization], [disable compiler optimization]), [if test $enableval = no ; then CFLAGS=`echo $CFLAGS | sed 's/-O[[0-9]]//'` fi]) AC_MSG_NOTICE([checking for cc features]) # CFLAGS mangling when using gcc. if test "$GCC" = yes; then AC_MSG_CHECKING([if gcc supports -fno-delete-null-pointer-checks]) _gcc_cflags_save=$CFLAGS CFLAGS="-fno-delete-null-pointer-checks" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -fno-delete-null-pointer-checks" fi CFLAGS="$CFLAGS -Wall" if test "$USE_MAINTAINER_MODE" = "yes"; then CFLAGS="$CFLAGS -Wcast-align -Wshadow -Wstrict-prototypes" CFLAGS="$CFLAGS -Wformat -Wno-format-y2k -Wformat-security" # If -Wno-missing-field-initializers is supported we can enable a # a bunch of really useful warnings. AC_MSG_CHECKING([if gcc supports -Wno-missing-field-initializers]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wno-missing-field-initializers" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -W -Wextra -Wbad-function-cast" CFLAGS="$CFLAGS -Wwrite-strings" CFLAGS="$CFLAGS -Wdeclaration-after-statement" CFLAGS="$CFLAGS -Wno-missing-field-initializers" CFLAGS="$CFLAGS -Wno-sign-compare" fi AC_MSG_CHECKING([if gcc supports -Wpointer-arith]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wpointer-arith" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -Wpointer-arith" fi fi fi # Check whether as(1) supports a noeexecstack feature. This test # includes an override option. CL_AS_NOEXECSTACK AC_SUBST(LIBGCRYPT_CONFIG_API_VERSION) AC_SUBST(LIBGCRYPT_CONFIG_LIBS) AC_SUBST(LIBGCRYPT_CONFIG_CFLAGS) AC_SUBST(LIBGCRYPT_CONFIG_HOST) AC_SUBST(LIBGCRYPT_THREAD_MODULES) AC_CONFIG_COMMANDS([gcrypt-conf],[[ chmod +x src/libgcrypt-config ]],[[ prefix=$prefix exec_prefix=$exec_prefix libdir=$libdir datadir=$datadir DATADIRNAME=$DATADIRNAME ]]) ##################### #### Conclusion. #### ##################### # Check that requested feature can actually be used and define # ENABLE_foo_SUPPORT macros. if test x"$aesnisupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_ssse3" != "yes" ; then aesnisupport="no (unsupported by compiler)" fi fi if test x"$shaextsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_shaext" != "yes" ; then shaextsupport="no (unsupported by compiler)" fi fi if test x"$pclmulsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then pclmulsupport="no (unsupported by compiler)" fi fi if test x"$sse41support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_sse41" != "yes" ; then sse41support="no (unsupported by compiler)" fi fi if test x"$avxsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then avxsupport="no (unsupported by compiler)" fi fi if test x"$avx2support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx2" != "yes" ; then avx2support="no (unsupported by compiler)" fi fi if test x"$avx512support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx512" != "yes" ; then avx512support="no (unsupported by compiler)" fi fi if test x"$gfnisupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_gfni" != "yes" ; then gfnisupport="no (unsupported by compiler)" fi fi if test x"$neonsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_neon" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_neon" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$armcryptosupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" != "yes" ; then armcryptosupport="no (unsupported by compiler)" fi fi fi if test x"$svesupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_sve" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_sve" != "yes" ; then svesupport="no (unsupported by compiler)" fi fi fi if test x"$sve2support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_sve2" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_sve2" != "yes" ; then sve2support="no (unsupported by compiler)" fi fi fi if test x"$aesnisupport" = xyes ; then AC_DEFINE(ENABLE_AESNI_SUPPORT, 1, [Enable support for Intel AES-NI instructions.]) fi if test x"$shaextsupport" = xyes ; then AC_DEFINE(ENABLE_SHAEXT_SUPPORT, 1, [Enable support for Intel SHAEXT instructions.]) fi if test x"$pclmulsupport" = xyes ; then AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1, [Enable support for Intel PCLMUL instructions.]) fi if test x"$sse41support" = xyes ; then AC_DEFINE(ENABLE_SSE41_SUPPORT, 1, [Enable support for Intel SSE4.1 instructions.]) fi if test x"$avxsupport" = xyes ; then AC_DEFINE(ENABLE_AVX_SUPPORT,1, [Enable support for Intel AVX instructions.]) fi if test x"$avx2support" = xyes ; then AC_DEFINE(ENABLE_AVX2_SUPPORT,1, [Enable support for Intel AVX2 instructions.]) fi if test x"$avx512support" = xyes ; then AC_DEFINE(ENABLE_AVX512_SUPPORT,1, [Enable support for Intel AVX512 instructions.]) fi if test x"$gfnisupport" = xyes ; then AC_DEFINE(ENABLE_GFNI_SUPPORT,1, [Enable support for Intel GFNI instructions.]) fi if test x"$neonsupport" = xyes ; then AC_DEFINE(ENABLE_NEON_SUPPORT,1, [Enable support for ARM NEON instructions.]) fi if test x"$armcryptosupport" = xyes ; then AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi if test x"$svesupport" = xyes ; then AC_DEFINE(ENABLE_SVE_SUPPORT,1, [Enable support for ARMv8 SVE instructions.]) fi if test x"$sve2support" = xyes ; then AC_DEFINE(ENABLE_SVE2_SUPPORT,1, [Enable support for ARMv9 SVE2 instructions.]) fi if test x"$ppccryptosupport" = xyes ; then AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1, [Enable support for POWER 8 (PowerISA 2.07) crypto extension.]) fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) fi if test x"$padlocksupport" = xyes ; then AC_DEFINE(ENABLE_PADLOCK_SUPPORT, 1, [Enable support for the PadLock engine.]) fi if test x"$drngsupport" = xyes ; then AC_DEFINE(ENABLE_DRNG_SUPPORT, 1, [Enable support for Intel DRNG (RDRAND instruction).]) fi if test x"$force_soft_hwfeatures" = xyes ; then AC_DEFINE(ENABLE_FORCE_SOFT_HWFEATURES, 1, [Enable forcing 'soft' HW feature bits on (for testing).]) fi # Define conditional sources and config.h symbols depending on the # selected ciphers, pubkey-ciphers, digests, kdfs, and random modules. LIST_MEMBER(arcfour, $enabled_ciphers) if test "$found" = "1"; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo" AC_DEFINE(USE_ARCFOUR, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS arcfour-amd64.lo" ;; esac fi LIST_MEMBER(blowfish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish.lo" AC_DEFINE(USE_BLOWFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-arm.lo" ;; esac fi LIST_MEMBER(cast5, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo" AC_DEFINE(USE_CAST5, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-arm.lo" ;; esac fi LIST_MEMBER(des, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo" AC_DEFINE(USE_DES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS des-amd64.lo" ;; esac fi LIST_MEMBER(aes, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael.lo" AC_DEFINE(USE_AES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-amd64.lo" # Build with the SSSE3 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64-asm.lo" # Build with the VAES/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes-avx2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-arm.lo" # Build with the ARMv8/AArch32 CE implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aarch64.lo" # Build with the ARMv8/AArch64 CE implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc9le.lo" if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" && test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then # Build with AES-GCM bulk implementation for P10 GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-gcm-p10le.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-p10le.lo" fi ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" ;; s390x-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-s390x.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the AES-NI implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aesni.lo" # Build with the Padlock implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-padlock.lo" + + # Build with the VAES/AVX2 implementation + GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes-i386.lo" + GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes-avx2-i386.lo" ;; esac fi LIST_MEMBER(twofish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish.lo" AC_DEFINE(USE_TWOFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-amd64.lo" if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-avx2-amd64.lo" fi ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-aarch64.lo" ;; esac fi LIST_MEMBER(serpent, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent.lo" AC_DEFINE(USE_SERPENT, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the SSE2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-sse2-amd64.lo" ;; esac if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-avx2-amd64.lo" fi if test x"$avx512support" = xyes ; then # Build with the AVX512 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-avx512-x86.lo" fi if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-armv7-neon.lo" fi fi LIST_MEMBER(rfc2268, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rfc2268.lo" AC_DEFINE(USE_RFC2268, 1, [Defined if this module should be included]) fi LIST_MEMBER(seed, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS seed.lo" AC_DEFINE(USE_SEED, 1, [Defined if this module should be included]) fi LIST_MEMBER(camellia, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia.lo camellia-glue.lo" AC_DEFINE(USE_CAMELLIA, 1, [Defined if this module should be included]) case "${host}" in arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aarch64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aarch64-ce.lo" ;; powerpc64le-*-*) # Build with the POWER vector implementations GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-ppc8le.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-ppc9le.lo" ;; esac if test x"$avxsupport" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx-amd64.lo" fi fi if test x"$avx2support" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx2-amd64.lo" # Build with the VAES/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-vaes-avx2-amd64.lo" # Build with the GFNI/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-gfni-avx2-amd64.lo" # Build with the GFNI/AVX512 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-gfni-avx512-amd64.lo" fi fi fi LIST_MEMBER(idea, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS idea.lo" AC_DEFINE(USE_IDEA, 1, [Defined if this module should be included]) fi LIST_MEMBER(salsa20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20.lo" AC_DEFINE(USE_SALSA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-amd64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-armv7-neon.lo" fi fi LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS gost28147.lo" AC_DEFINE(USE_GOST28147, 1, [Defined if this module should be included]) fi LIST_MEMBER(chacha20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20.lo" AC_DEFINE(USE_CHACHA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-ssse3.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-avx2.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-avx512.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-aarch64.lo" ;; powerpc64le-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" # Build with the assembly implementation if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" && test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-p10le-8x.lo" fi ;; powerpc64-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" ;; powerpc-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" ;; s390x-*-*) # Build with the s390x/zSeries vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-s390x.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-armv7-neon.lo" fi fi LIST_MEMBER(sm4, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS sm4.lo" AC_DEFINE(USE_SM4, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx2-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-gfni-avx2-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-gfni-avx512-amd64.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aarch64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-armv8-aarch64-ce.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-armv9-aarch64-sve-ce.lo" ;; powerpc64le-*-*) # Build with the ppc64le vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-ppc.lo" ;; esac fi LIST_MEMBER(aria, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS aria.lo" AC_DEFINE(USE_ARIA, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS aria-aesni-avx-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS aria-aesni-avx2-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS aria-gfni-avx512-amd64.lo" ;; esac fi LIST_MEMBER(dsa, $enabled_pubkey_ciphers) AM_CONDITIONAL(USE_DSA, [test "$found" = "1"]) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS dsa.lo" AC_DEFINE(USE_DSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(rsa, $enabled_pubkey_ciphers) AM_CONDITIONAL(USE_RSA, [test "$found" = "1"]) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS rsa.lo" AC_DEFINE(USE_RSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(elgamal, $enabled_pubkey_ciphers) AM_CONDITIONAL(USE_ELGAMAL, [test "$found" = "1"]) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS elgamal.lo" AC_DEFINE(USE_ELGAMAL, 1, [Defined if this module should be included]) fi LIST_MEMBER(ecc, $enabled_pubkey_ciphers) AM_CONDITIONAL(USE_ECC, [test "$found" = "1"]) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo \ ecc-sm2.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi LIST_MEMBER(crc, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc.lo" AC_DEFINE(USE_CRC, 1, [Defined if this module should be included]) case "${host}" in i?86-*-* | x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-intel-pclmul.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-ce.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; powerpc64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; powerpc-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; esac fi LIST_MEMBER(gostr3411-94, $enabled_digests) if test "$found" = "1" ; then # GOST R 34.11-94 internally uses GOST 28147-89 LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS gostr3411-94.lo" AC_DEFINE(USE_GOST_R_3411_94, 1, [Defined if this module should be included]) fi fi LIST_MEMBER(stribog, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS stribog.lo" AC_DEFINE(USE_GOST_R_3411_12, 1, [Defined if this module should be included]) fi LIST_MEMBER(md2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md2.lo" AC_DEFINE(USE_MD2, 1, [Defined if this module should be included]) fi LIST_MEMBER(md4, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md4.lo" AC_DEFINE(USE_MD4, 1, [Defined if this module should be included]) fi LIST_MEMBER(md5, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md5.lo" AC_DEFINE(USE_MD5, 1, [Defined if this module should be included]) fi LIST_MEMBER(rmd160, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS rmd160.lo" AC_DEFINE(USE_RMD160, 1, [Defined if this module should be included]) fi LIST_MEMBER(sha256, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256.lo" AC_DEFINE(USE_SHA256, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-intel-shaext.lo" ;; esac fi LIST_MEMBER(sha512, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512.lo" AC_DEFINE(USE_SHA512, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx2-bmi2-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx512-amd64.lo" ;; i?86-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-i386.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-armv7-neon.lo" fi fi LIST_MEMBER(sha3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak.lo" AC_DEFINE(USE_SHA3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS keccak-amd64-avx512.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS keccak-armv7-neon.lo" fi fi LIST_MEMBER(tiger, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS tiger.lo" AC_DEFINE(USE_TIGER, 1, [Defined if this module should be included]) fi LIST_MEMBER(whirlpool, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool.lo" AC_DEFINE(USE_WHIRLPOOL, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS whirlpool-sse2-amd64.lo" ;; esac fi LIST_MEMBER(blake2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2.lo" AC_DEFINE(USE_BLAKE2, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2b-amd64-avx2.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2b-amd64-avx512.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2s-amd64-avx.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2s-amd64-avx512.lo" ;; esac fi LIST_MEMBER(sm3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo" AC_DEFINE(USE_SM3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-avx-bmi2-amd64.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-aarch64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-armv8-aarch64-ce.lo" ;; esac fi # SHA-1 needs to be included always for example because it is used by # random-csprng.c. GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1.lo" AC_DEFINE(USE_SHA1, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-bmi2-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv7-neon.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-intel-shaext.lo" ;; esac # Arch specific GCM implementations case "${host}" in i?86-*-* | x86_64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-intel-pclmul.lo" ;; arm*-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv7-neon.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch32-ce.lo" ;; aarch64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch64-ce.lo" ;; powerpc64le-*-* | powerpc64-*-* | powerpc-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-ppc.lo" ;; esac # Arch specific MAC implementations case "${host}" in s390x-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-s390x.lo" ;; x86_64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-amd64-avx512.lo" ;; powerpc64le-*-*) # Build with the assembly implementation if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" && test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-p10le.lo" fi ;; esac LIST_MEMBER(scrypt, $enabled_kdfs) if test "$found" = "1" ; then GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo" AC_DEFINE(USE_SCRYPT, 1, [Defined if this module should be included]) fi LIST_MEMBER(getentropy, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndgetentropy.lo" AC_DEFINE(USE_RNDGETENTROPY, 1, [Defined if the getentropy RNG should be used.]) fi LIST_MEMBER(linux, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndoldlinux.lo" AC_DEFINE(USE_RNDOLDLINUX, 1, [Defined if the /dev/random RNG should be used.]) fi LIST_MEMBER(unix, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndunix.lo" AC_DEFINE(USE_RNDUNIX, 1, [Defined if the default Unix RNG should be used.]) fi LIST_MEMBER(egd, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndegd.lo" AC_DEFINE(USE_RNDEGD, 1, [Defined if the EGD based RNG should be used.]) fi LIST_MEMBER(w32, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32.lo" AC_DEFINE(USE_RNDW32, 1, [Defined if the Windows specific RNG should be used.]) fi LIST_MEMBER(w32ce, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32ce.lo" AC_DEFINE(USE_RNDW32CE, 1, [Defined if the WindowsCE specific RNG should be used.]) fi if test "$try_asm_modules" = yes ; then # Build with assembly implementations GCRYPT_CIPHERS="$GCRYPT_CIPHERS $GCRYPT_ASM_CIPHERS" GCRYPT_DIGESTS="$GCRYPT_DIGESTS $GCRYPT_ASM_DIGESTS" fi AC_SUBST([GCRYPT_CIPHERS]) AC_SUBST([GCRYPT_PUBKEY_CIPHERS]) AC_SUBST([GCRYPT_DIGESTS]) AC_SUBST([GCRYPT_KDFS]) AC_SUBST([GCRYPT_RANDOM]) AC_SUBST(LIBGCRYPT_CIPHERS, $enabled_ciphers) AC_SUBST(LIBGCRYPT_PUBKEY_CIPHERS, $enabled_pubkey_ciphers) AC_SUBST(LIBGCRYPT_DIGESTS, $enabled_digests) # For printing the configuration we need a colon separated list of # algorithm names. tmp=`echo "$enabled_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_CIPHERS, "$tmp", [List of available cipher algorithms]) tmp=`echo "$enabled_pubkey_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_PUBKEY_CIPHERS, "$tmp", [List of available public key cipher algorithms]) tmp=`echo "$enabled_digests" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_DIGESTS, "$tmp", [List of available digest algorithms]) tmp=`echo "$enabled_kdfs" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_KDFS, "$tmp", [List of available KDF algorithms]) # # Define conditional sources depending on the used hardware platform. # Note that all possible modules must also be listed in # src/Makefile.am (EXTRA_libgcrypt_la_SOURCES). # GCRYPT_HWF_MODULES= case "$mpi_cpu_arch" in x86) AC_DEFINE(HAVE_CPU_ARCH_X86, 1, [Defined for the x86 platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-x86.lo" ;; alpha) AC_DEFINE(HAVE_CPU_ARCH_ALPHA, 1, [Defined for Alpha platforms]) ;; sparc) AC_DEFINE(HAVE_CPU_ARCH_SPARC, 1, [Defined for SPARC platforms]) ;; mips) AC_DEFINE(HAVE_CPU_ARCH_MIPS, 1, [Defined for MIPS platforms]) ;; m68k) AC_DEFINE(HAVE_CPU_ARCH_M68K, 1, [Defined for M68k platforms]) ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo" ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; aarch64) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM AArch64 platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; s390x) AC_DEFINE(HAVE_CPU_ARCH_S390X, 1, [Defined for s390x/zSeries platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-s390x.lo" ;; esac AC_SUBST([GCRYPT_HWF_MODULES]) # # Option to disable building of doc file # build_doc=yes AC_ARG_ENABLE([doc], AS_HELP_STRING([--disable-doc], [do not build the documentation]), build_doc=$enableval, build_doc=yes) AM_CONDITIONAL([BUILD_DOC], [test "x$build_doc" != xno]) # # Provide information about the build. # BUILD_REVISION="mym4_revision" AC_SUBST(BUILD_REVISION) AC_DEFINE_UNQUOTED(BUILD_REVISION, "$BUILD_REVISION", [GIT commit id revision used to build this package]) changequote(,)dnl BUILD_VERSION=`echo "$PACKAGE_VERSION" | sed 's/\([0-9.]*\).*/\1./'` changequote([,])dnl BUILD_VERSION="${BUILD_VERSION}mym4_revision_dec" BUILD_FILEVERSION=`echo "${BUILD_VERSION}" | tr . ,` AC_SUBST(BUILD_VERSION) AC_SUBST(BUILD_FILEVERSION) AC_ARG_ENABLE([build-timestamp], AS_HELP_STRING([--enable-build-timestamp], [set an explicit build timestamp for reproducibility. (default is the current time in ISO-8601 format)]), [if test "$enableval" = "yes"; then BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date` else BUILD_TIMESTAMP="$enableval" fi], [BUILD_TIMESTAMP=""]) AC_SUBST(BUILD_TIMESTAMP) AC_DEFINE_UNQUOTED(BUILD_TIMESTAMP, "$BUILD_TIMESTAMP", [The time this package was configured for a build]) # And create the files. AC_CONFIG_FILES([ Makefile m4/Makefile compat/Makefile mpi/Makefile cipher/Makefile random/Makefile doc/Makefile src/Makefile src/gcrypt.h src/libgcrypt-config src/libgcrypt.pc src/versioninfo.rc tests/Makefile ]) AC_CONFIG_FILES([tests/hashtest-6g], [chmod +x tests/hashtest-6g]) AC_CONFIG_FILES([tests/hashtest-256g], [chmod +x tests/hashtest-256g]) AC_CONFIG_FILES([tests/basic-disable-all-hwf], [chmod +x tests/basic-disable-all-hwf]) AC_OUTPUT detection_module="${GCRYPT_HWF_MODULES%.lo}" test -n "$detection_module" || detection_module="none" # Give some feedback GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Libgcrypt],[v${VERSION} has been configured as follows:]) GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Platform: ],[$PRINTABLE_OS_NAME ($host)]) GCRY_MSG_SHOW([Hardware detection module:],[$detection_module]) GCRY_MSG_WRAP([Enabled cipher algorithms:],[$enabled_ciphers]) GCRY_MSG_WRAP([Enabled digest algorithms:],[$enabled_digests]) GCRY_MSG_WRAP([Enabled kdf algorithms: ],[$enabled_kdfs]) GCRY_MSG_WRAP([Enabled pubkey algorithms:],[$enabled_pubkey_ciphers]) GCRY_MSG_SHOW([Random number generator: ],[$random]) GCRY_MSG_SHOW([Try using jitter entropy: ],[$jentsupport]) GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities]) GCRY_MSG_SHOW([FIPS module version: ],[$fips_module_version]) GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport]) GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport]) GCRY_MSG_SHOW([Try using Intel SHAEXT: ],[$shaextsupport]) GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport]) GCRY_MSG_SHOW([Try using Intel SSE4.1: ],[$sse41support]) GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport]) GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport]) GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using Intel AVX512: ],[$avx512support]) GCRY_MSG_SHOW([Try using Intel GFNI: ],[$gfnisupport]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) GCRY_MSG_SHOW([Try using ARMv8 SVE: ],[$svesupport]) GCRY_MSG_SHOW([Try using ARMv9 SVE2: ],[$sve2support]) GCRY_MSG_SHOW([Try using PPC crypto: ],[$ppccryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then cat <