diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 250b229e..6230d52e 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -1,278 +1,280 @@ # Makefile for cipher modules # Copyright (C) 1998, 1999, 2000, 2001, 2002, # 2003, 2009 Free Software Foundation, Inc. # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # Process this file with automake to produce Makefile.in # Need to include ../src in addition to top_srcdir because gcrypt.h is # a built header. AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi AM_CFLAGS = $(GPG_ERROR_CFLAGS) AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) EXTRA_DIST = gost-s-box.c CLEANFILES = gost-s-box$(EXEEXT_FOR_BUILD) DISTCLEANFILES = gost-sb.h noinst_LTLIBRARIES = libcipher.la GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) libcipher_la_LIBADD = $(GCRYPT_MODULES) libcipher_la_SOURCES = \ cipher.c cipher-internal.h \ cipher-cbc.c \ cipher-cfb.c \ cipher-ofb.c \ cipher-ctr.c \ cipher-aeswrap.c \ cipher-ccm.c \ cipher-cmac.c \ cipher-gcm.c \ cipher-poly1305.c \ cipher-ocb.c \ cipher-xts.c \ cipher-eax.c \ cipher-siv.c \ cipher-gcm-siv.c \ pubkey.c pubkey-internal.h pubkey-util.c \ md.c \ mac.c mac-internal.h \ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ poly1305.c poly1305-internal.h \ kdf.c kdf-internal.h \ bithelp.h \ bufhelp.h \ primegen.c \ hash-common.c hash-common.h \ dsa-common.c rsa-common.c \ sha1.h EXTRA_libcipher_la_SOURCES = \ asm-common-aarch64.h \ asm-common-amd64.h \ asm-common-s390x.h \ asm-inline-s390x.h \ asm-poly1305-aarch64.h \ asm-poly1305-amd64.h \ asm-poly1305-s390x.h \ arcfour.c arcfour-amd64.S \ blowfish.c blowfish-amd64.S blowfish-arm.S \ cast5.c cast5-amd64.S cast5-arm.S \ chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \ chacha20-amd64-avx512.S chacha20-armv7-neon.S chacha20-aarch64.S \ chacha20-ppc.c chacha20-s390x.S \ + chacha20-p10le-8x.s \ cipher-gcm-ppc.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ crc.c crc-intel-pclmul.c crc-armv8-ce.c \ crc-armv8-aarch64-ce.S \ crc-ppc.c \ des.c des-amd64.S \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ md4.c \ md5.c \ poly1305-s390x.S poly1305-amd64-avx512.S \ + poly1305-p10le.s \ rijndael.c rijndael-internal.h rijndael-tables.h \ rijndael-aesni.c rijndael-padlock.c \ rijndael-amd64.S rijndael-arm.S \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-vaes.c rijndael-vaes-avx2-amd64.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ rijndael-ppc.c rijndael-ppc9le.c \ rijndael-p10le.c rijndael-gcm-p10le.s \ rijndael-ppc-common.h rijndael-ppc-functions.h \ rijndael-s390x.c \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ scrypt.c \ seed.c \ serpent.c serpent-sse2-amd64.S \ sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S sm4-aarch64.S \ sm4-armv8-aarch64-ce.S sm4-gfni-avx2-amd64.S \ serpent-avx2-amd64.S serpent-armv7-neon.S \ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \ sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \ sha256-avx2-bmi2-amd64.S \ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ sha256-intel-shaext.c sha256-ppc.c \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \ sha512-avx2-bmi2-amd64.S sha512-avx512-amd64.S \ sha512-armv7-neon.S sha512-arm.S \ sha512-ppc.c sha512-ssse3-i386.c \ sm3.c sm3-avx-bmi2-amd64.S sm3-aarch64.S sm3-armv8-aarch64-ce.S \ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ twofish-avx2-amd64.S \ rfc2268.c \ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ camellia-aesni-avx2-amd64.h \ camellia-gfni-avx2-amd64.S camellia-gfni-avx512-amd64.S \ camellia-vaes-avx2-amd64.S camellia-aesni-avx2-amd64.S \ camellia-arm.S camellia-aarch64.S \ blake2.c \ blake2b-amd64-avx2.S blake2s-amd64-avx.S gost28147.lo: gost-sb.h gost-sb.h: gost-s-box$(EXEEXT_FOR_BUILD) ./gost-s-box$(EXEEXT_FOR_BUILD) $@ gost-s-box$(EXEEXT_FOR_BUILD): gost-s-box.c $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) \ $(CPPFLAGS_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c if ENABLE_O_FLAG_MUNGING o_flag_munging = sed -e 's/-O\([2-9sg][2-9sg]*\)/-O1/' -e 's/-Ofast/-O1/g' else o_flag_munging = cat endif # We need to lower the optimization for this module. tiger.o: $(srcdir)/tiger.c Makefile `echo $(COMPILE) -c $< | $(o_flag_munging) ` tiger.lo: $(srcdir)/tiger.c Makefile `echo $(LTCOMPILE) -c $< | $(o_flag_munging) ` # We need to disable instrumentation for these modules as they use cc as # thin assembly front-end and do not tolerate in-between function calls # inserted by compiler as those functions may clobber the XMM registers. if ENABLE_INSTRUMENTATION_MUNGING instrumentation_munging = sed \ -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ -e 's/-fcoverage[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' else instrumentation_munging = cat endif rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha256-ssse3-i386.o: $(srcdir)/sha256-ssse3-i386.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha256-ssse3-i386.lo: $(srcdir)/sha256-ssse3-i386.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS ppc_vcrypto_cflags = -O2 -maltivec -mvsx -mcrypto else ppc_vcrypto_cflags = endif rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-p10le.o: $(srcdir)/rijndael-p10le.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-p10le.lo: $(srcdir)/rijndael-p10le.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha256-ppc.lo: $(srcdir)/sha256-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` chacha20-ppc.o: $(srcdir)/chacha20-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` chacha20-ppc.lo: $(srcdir)/chacha20-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` crc-ppc.o: $(srcdir)/crc-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` cipher-gcm-ppc.o: $(srcdir)/cipher-gcm-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` cipher-gcm-ppc.lo: $(srcdir)/cipher-gcm-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` diff --git a/cipher/chacha20-p10le-8x.s b/cipher/chacha20-p10le-8x.s new file mode 100644 index 00000000..427c6310 --- /dev/null +++ b/cipher/chacha20-p10le-8x.s @@ -0,0 +1,864 @@ +# Copyright 2021- IBM Inc. All rights reserved +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, see . +# +#=================================================================================== +# Written by Danny Tsen +# +# This function handles multiple 64-byte block data length +# and the length should be more than 512 bytes. +# +# unsigned int _gcry_chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len); +# +# r1 - top of the stack +# r3 to r10 input parameters +# r3 - out +# r4 - inp +# r5 - len +# r6 - key[8] +# r7 - counter[4] +# +# do rounds, 8 quarter rounds +# 1. a += b; d ^= a; d <<<= 16; +# 2. c += d; b ^= c; b <<<= 12; +# 3. a += b; d ^= a; d <<<= 8; +# 4. c += d; b ^= c; b <<<= 7 +# +# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 16 +# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 12 +# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 8 +# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 7 +# +# 4 blocks (a b c d) +# +# a0 b0 c0 d0 +# a1 b1 c1 d1 +# ... +# a4 b4 c4 d4 +# ... +# a8 b8 c8 d8 +# ... +# a12 b12 c12 d12 +# a13 ... +# a14 ... +# a15 b15 c15 d15 +# +# Column round (v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) +# Diagnal round (v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) +# +.text + +.macro QT_loop_8x + # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) + xxlor 0, 32+25, 32+25 + xxlor 32+25, 20, 20 + vadduwm 0, 0, 4 + vadduwm 1, 1, 5 + vadduwm 2, 2, 6 + vadduwm 3, 3, 7 + vadduwm 16, 16, 20 + vadduwm 17, 17, 21 + vadduwm 18, 18, 22 + vadduwm 19, 19, 23 + + vpermxor 12, 12, 0, 25 + vpermxor 13, 13, 1, 25 + vpermxor 14, 14, 2, 25 + vpermxor 15, 15, 3, 25 + vpermxor 28, 28, 16, 25 + vpermxor 29, 29, 17, 25 + vpermxor 30, 30, 18, 25 + vpermxor 31, 31, 19, 25 + xxlor 32+25, 0, 0 + vadduwm 8, 8, 12 + vadduwm 9, 9, 13 + vadduwm 10, 10, 14 + vadduwm 11, 11, 15 + vadduwm 24, 24, 28 + vadduwm 25, 25, 29 + vadduwm 26, 26, 30 + vadduwm 27, 27, 31 + vxor 4, 4, 8 + vxor 5, 5, 9 + vxor 6, 6, 10 + vxor 7, 7, 11 + vxor 20, 20, 24 + vxor 21, 21, 25 + vxor 22, 22, 26 + vxor 23, 23, 27 + + xxlor 0, 32+25, 32+25 + xxlor 32+25, 21, 21 + vrlw 4, 4, 25 # + vrlw 5, 5, 25 + vrlw 6, 6, 25 + vrlw 7, 7, 25 + vrlw 20, 20, 25 # + vrlw 21, 21, 25 + vrlw 22, 22, 25 + vrlw 23, 23, 25 + xxlor 32+25, 0, 0 + vadduwm 0, 0, 4 + vadduwm 1, 1, 5 + vadduwm 2, 2, 6 + vadduwm 3, 3, 7 + vadduwm 16, 16, 20 + vadduwm 17, 17, 21 + vadduwm 18, 18, 22 + vadduwm 19, 19, 23 + + xxlor 0, 32+25, 32+25 + xxlor 32+25, 22, 22 + vpermxor 12, 12, 0, 25 + vpermxor 13, 13, 1, 25 + vpermxor 14, 14, 2, 25 + vpermxor 15, 15, 3, 25 + vpermxor 28, 28, 16, 25 + vpermxor 29, 29, 17, 25 + vpermxor 30, 30, 18, 25 + vpermxor 31, 31, 19, 25 + xxlor 32+25, 0, 0 + vadduwm 8, 8, 12 + vadduwm 9, 9, 13 + vadduwm 10, 10, 14 + vadduwm 11, 11, 15 + vadduwm 24, 24, 28 + vadduwm 25, 25, 29 + vadduwm 26, 26, 30 + vadduwm 27, 27, 31 + xxlor 0, 32+28, 32+28 + xxlor 32+28, 23, 23 + vxor 4, 4, 8 + vxor 5, 5, 9 + vxor 6, 6, 10 + vxor 7, 7, 11 + vxor 20, 20, 24 + vxor 21, 21, 25 + vxor 22, 22, 26 + vxor 23, 23, 27 + vrlw 4, 4, 28 # + vrlw 5, 5, 28 + vrlw 6, 6, 28 + vrlw 7, 7, 28 + vrlw 20, 20, 28 # + vrlw 21, 21, 28 + vrlw 22, 22, 28 + vrlw 23, 23, 28 + xxlor 32+28, 0, 0 + + # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) + xxlor 0, 32+25, 32+25 + xxlor 32+25, 20, 20 + vadduwm 0, 0, 5 + vadduwm 1, 1, 6 + vadduwm 2, 2, 7 + vadduwm 3, 3, 4 + vadduwm 16, 16, 21 + vadduwm 17, 17, 22 + vadduwm 18, 18, 23 + vadduwm 19, 19, 20 + + vpermxor 15, 15, 0, 25 + vpermxor 12, 12, 1, 25 + vpermxor 13, 13, 2, 25 + vpermxor 14, 14, 3, 25 + vpermxor 31, 31, 16, 25 + vpermxor 28, 28, 17, 25 + vpermxor 29, 29, 18, 25 + vpermxor 30, 30, 19, 25 + + xxlor 32+25, 0, 0 + vadduwm 10, 10, 15 + vadduwm 11, 11, 12 + vadduwm 8, 8, 13 + vadduwm 9, 9, 14 + vadduwm 26, 26, 31 + vadduwm 27, 27, 28 + vadduwm 24, 24, 29 + vadduwm 25, 25, 30 + vxor 5, 5, 10 + vxor 6, 6, 11 + vxor 7, 7, 8 + vxor 4, 4, 9 + vxor 21, 21, 26 + vxor 22, 22, 27 + vxor 23, 23, 24 + vxor 20, 20, 25 + + xxlor 0, 32+25, 32+25 + xxlor 32+25, 21, 21 + vrlw 5, 5, 25 + vrlw 6, 6, 25 + vrlw 7, 7, 25 + vrlw 4, 4, 25 + vrlw 21, 21, 25 + vrlw 22, 22, 25 + vrlw 23, 23, 25 + vrlw 20, 20, 25 + xxlor 32+25, 0, 0 + + vadduwm 0, 0, 5 + vadduwm 1, 1, 6 + vadduwm 2, 2, 7 + vadduwm 3, 3, 4 + vadduwm 16, 16, 21 + vadduwm 17, 17, 22 + vadduwm 18, 18, 23 + vadduwm 19, 19, 20 + + xxlor 0, 32+25, 32+25 + xxlor 32+25, 22, 22 + vpermxor 15, 15, 0, 25 + vpermxor 12, 12, 1, 25 + vpermxor 13, 13, 2, 25 + vpermxor 14, 14, 3, 25 + vpermxor 31, 31, 16, 25 + vpermxor 28, 28, 17, 25 + vpermxor 29, 29, 18, 25 + vpermxor 30, 30, 19, 25 + xxlor 32+25, 0, 0 + + vadduwm 10, 10, 15 + vadduwm 11, 11, 12 + vadduwm 8, 8, 13 + vadduwm 9, 9, 14 + vadduwm 26, 26, 31 + vadduwm 27, 27, 28 + vadduwm 24, 24, 29 + vadduwm 25, 25, 30 + + xxlor 0, 32+28, 32+28 + xxlor 32+28, 23, 23 + vxor 5, 5, 10 + vxor 6, 6, 11 + vxor 7, 7, 8 + vxor 4, 4, 9 + vxor 21, 21, 26 + vxor 22, 22, 27 + vxor 23, 23, 24 + vxor 20, 20, 25 + vrlw 5, 5, 28 + vrlw 6, 6, 28 + vrlw 7, 7, 28 + vrlw 4, 4, 28 + vrlw 21, 21, 28 + vrlw 22, 22, 28 + vrlw 23, 23, 28 + vrlw 20, 20, 28 + xxlor 32+28, 0, 0 +.endm + +.macro QT_loop_4x + # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) + vadduwm 0, 0, 4 + vadduwm 1, 1, 5 + vadduwm 2, 2, 6 + vadduwm 3, 3, 7 + vpermxor 12, 12, 0, 20 + vpermxor 13, 13, 1, 20 + vpermxor 14, 14, 2, 20 + vpermxor 15, 15, 3, 20 + vadduwm 8, 8, 12 + vadduwm 9, 9, 13 + vadduwm 10, 10, 14 + vadduwm 11, 11, 15 + vxor 4, 4, 8 + vxor 5, 5, 9 + vxor 6, 6, 10 + vxor 7, 7, 11 + vrlw 4, 4, 21 + vrlw 5, 5, 21 + vrlw 6, 6, 21 + vrlw 7, 7, 21 + vadduwm 0, 0, 4 + vadduwm 1, 1, 5 + vadduwm 2, 2, 6 + vadduwm 3, 3, 7 + vpermxor 12, 12, 0, 22 + vpermxor 13, 13, 1, 22 + vpermxor 14, 14, 2, 22 + vpermxor 15, 15, 3, 22 + vadduwm 8, 8, 12 + vadduwm 9, 9, 13 + vadduwm 10, 10, 14 + vadduwm 11, 11, 15 + vxor 4, 4, 8 + vxor 5, 5, 9 + vxor 6, 6, 10 + vxor 7, 7, 11 + vrlw 4, 4, 23 + vrlw 5, 5, 23 + vrlw 6, 6, 23 + vrlw 7, 7, 23 + + # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) + vadduwm 0, 0, 5 + vadduwm 1, 1, 6 + vadduwm 2, 2, 7 + vadduwm 3, 3, 4 + vpermxor 15, 15, 0, 20 + vpermxor 12, 12, 1, 20 + vpermxor 13, 13, 2, 20 + vpermxor 14, 14, 3, 20 + vadduwm 10, 10, 15 + vadduwm 11, 11, 12 + vadduwm 8, 8, 13 + vadduwm 9, 9, 14 + vxor 5, 5, 10 + vxor 6, 6, 11 + vxor 7, 7, 8 + vxor 4, 4, 9 + vrlw 5, 5, 21 + vrlw 6, 6, 21 + vrlw 7, 7, 21 + vrlw 4, 4, 21 + vadduwm 0, 0, 5 + vadduwm 1, 1, 6 + vadduwm 2, 2, 7 + vadduwm 3, 3, 4 + vpermxor 15, 15, 0, 22 + vpermxor 12, 12, 1, 22 + vpermxor 13, 13, 2, 22 + vpermxor 14, 14, 3, 22 + vadduwm 10, 10, 15 + vadduwm 11, 11, 12 + vadduwm 8, 8, 13 + vadduwm 9, 9, 14 + vxor 5, 5, 10 + vxor 6, 6, 11 + vxor 7, 7, 8 + vxor 4, 4, 9 + vrlw 5, 5, 23 + vrlw 6, 6, 23 + vrlw 7, 7, 23 + vrlw 4, 4, 23 +.endm + +# Transpose +.macro TP_4x a0 a1 a2 a3 + xxmrghw 10, 32+\a0, 32+\a1 # a0, a1, b0, b1 + xxmrghw 11, 32+\a2, 32+\a3 # a2, a3, b2, b3 + xxmrglw 12, 32+\a0, 32+\a1 # c0, c1, d0, d1 + xxmrglw 13, 32+\a2, 32+\a3 # c2, c3, d2, d3 + xxpermdi 32+\a0, 10, 11, 0 # a0, a1, a2, a3 + xxpermdi 32+\a1, 10, 11, 3 # b0, b1, b2, b3 + xxpermdi 32+\a2, 12, 13, 0 # c0, c1, c2, c3 + xxpermdi 32+\a3, 12, 13, 3 # d0, d1, d2, d3 +.endm + +# key stream = working state + state +.macro Add_state S + vadduwm \S+0, \S+0, 16-\S + vadduwm \S+4, \S+4, 17-\S + vadduwm \S+8, \S+8, 18-\S + vadduwm \S+12, \S+12, 19-\S + + vadduwm \S+1, \S+1, 16-\S + vadduwm \S+5, \S+5, 17-\S + vadduwm \S+9, \S+9, 18-\S + vadduwm \S+13, \S+13, 19-\S + + vadduwm \S+2, \S+2, 16-\S + vadduwm \S+6, \S+6, 17-\S + vadduwm \S+10, \S+10, 18-\S + vadduwm \S+14, \S+14, 19-\S + + vadduwm \S+3, \S+3, 16-\S + vadduwm \S+7, \S+7, 17-\S + vadduwm \S+11, \S+11, 18-\S + vadduwm \S+15, \S+15, 19-\S +.endm + +# +# write 256 bytes +# +.macro Write_256 S + add 9, 14, 5 + add 16, 14, 4 + lxvw4x 0, 0, 9 + lxvw4x 1, 17, 9 + lxvw4x 2, 18, 9 + lxvw4x 3, 19, 9 + lxvw4x 4, 20, 9 + lxvw4x 5, 21, 9 + lxvw4x 6, 22, 9 + lxvw4x 7, 23, 9 + lxvw4x 8, 24, 9 + lxvw4x 9, 25, 9 + lxvw4x 10, 26, 9 + lxvw4x 11, 27, 9 + lxvw4x 12, 28, 9 + lxvw4x 13, 29, 9 + lxvw4x 14, 30, 9 + lxvw4x 15, 31, 9 + + xxlxor \S+32, \S+32, 0 + xxlxor \S+36, \S+36, 1 + xxlxor \S+40, \S+40, 2 + xxlxor \S+44, \S+44, 3 + xxlxor \S+33, \S+33, 4 + xxlxor \S+37, \S+37, 5 + xxlxor \S+41, \S+41, 6 + xxlxor \S+45, \S+45, 7 + xxlxor \S+34, \S+34, 8 + xxlxor \S+38, \S+38, 9 + xxlxor \S+42, \S+42, 10 + xxlxor \S+46, \S+46, 11 + xxlxor \S+35, \S+35, 12 + xxlxor \S+39, \S+39, 13 + xxlxor \S+43, \S+43, 14 + xxlxor \S+47, \S+47, 15 + + stxvw4x \S+32, 0, 16 + stxvw4x \S+36, 17, 16 + stxvw4x \S+40, 18, 16 + stxvw4x \S+44, 19, 16 + + stxvw4x \S+33, 20, 16 + stxvw4x \S+37, 21, 16 + stxvw4x \S+41, 22, 16 + stxvw4x \S+45, 23, 16 + + stxvw4x \S+34, 24, 16 + stxvw4x \S+38, 25, 16 + stxvw4x \S+42, 26, 16 + stxvw4x \S+46, 27, 16 + + stxvw4x \S+35, 28, 16 + stxvw4x \S+39, 29, 16 + stxvw4x \S+43, 30, 16 + stxvw4x \S+47, 31, 16 + +.endm + +# +# unsigned int _gcry_chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len); +# +.global _gcry_chacha20_p10le_8x +.align 5 +_gcry_chacha20_p10le_8x: + cmpdi 6, 512 + blt Out_no_chacha + + stdu 1,-1024(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + std 22,176(1) + std 23,184(1) + std 24,192(1) + std 25,200(1) + std 26,208(1) + std 27,216(1) + std 28,224(1) + std 29,232(1) + std 30,240(1) + std 31,248(1) + std 0, 1040(1) + + li 17, 16 + li 18, 32 + li 19, 48 + li 20, 64 + li 21, 80 + li 22, 96 + li 23, 112 + li 24, 128 + li 25, 144 + li 26, 160 + li 27, 176 + li 28, 192 + li 29, 208 + li 30, 224 + li 31, 240 + addi 9, 1, 256 + stvx 20, 0, 9 + stvx 21, 17, 9 + stvx 22, 18, 9 + stvx 23, 19, 9 + stvx 24, 20, 9 + stvx 25, 21, 9 + stvx 26, 22, 9 + stvx 27, 23, 9 + stvx 28, 24, 9 + stvx 29, 25, 9 + stvx 30, 26, 9 + stvx 31, 27, 9 + + add 9, 9, 27 + addi 14, 17, 16 + stxvx 14, 14, 9 + addi 14, 14, 16 + stxvx 15, 14, 9 + addi 14, 14, 16 + stxvx 16, 14, 9 + addi 14, 14, 16 + stxvx 17, 14, 9 + addi 14, 14, 16 + stxvx 18, 14, 9 + addi 14, 14, 16 + stxvx 19, 14, 9 + addi 14, 14, 16 + stxvx 20, 14, 9 + addi 14, 14, 16 + stxvx 21, 14, 9 + addi 14, 14, 16 + stxvx 22, 14, 9 + addi 14, 14, 16 + stxvx 23, 14, 9 + addi 14, 14, 16 + stxvx 24, 14, 9 + addi 14, 14, 16 + stxvx 25, 14, 9 + addi 14, 14, 16 + stxvx 26, 14, 9 + addi 14, 14, 16 + stxvx 27, 14, 9 + addi 14, 14, 16 + stxvx 28, 14, 9 + addi 14, 14, 16 + stxvx 29, 14, 9 + addi 14, 14, 16 + stxvx 30, 14, 9 + addi 14, 14, 16 + stxvx 31, 14, 9 + + mr 15, 6 # len + li 14, 0 # offset to inp and outp + + ld 10, sigma@got(2) + + lxvw4x 48, 0, 3 # vr16, constants + lxvw4x 49, 17, 3 # vr17, key 1 + lxvw4x 50, 18, 3 # vr18, key 2 + lxvw4x 51, 19, 3 # vr19, counter, nonce + + lxvw4x 62, 19, 10 # vr30, 4 + + vspltisw 21, 12 + vspltisw 23, 7 + + ld 11, permx@got(2) + lxvw4x 32+20, 0, 11 + lxvw4x 32+22, 17, 11 + + li 8, 10 + mtctr 8 + + xxlor 16, 48, 48 + xxlor 17, 49, 49 + xxlor 18, 50, 50 + xxlor 19, 51, 51 + + vspltisw 25, 4 + vspltisw 26, 8 + + xxlor 16, 48, 48 + xxlor 17, 49, 49 + xxlor 18, 50, 50 + xxlor 19, 51, 51 + + xxlor 25, 32+26, 32+26 + xxlor 24, 32+25, 32+25 + + vadduwm 31, 30, 25 # (0, 1, 2, 3) + (4, 4, 4, 4) + xxlor 30, 32+30, 32+30 + xxlor 31, 32+31, 32+31 + + xxlor 20, 32+20, 32+20 + xxlor 21, 32+21, 32+21 + xxlor 22, 32+22, 32+22 + xxlor 23, 32+23, 32+23 + +Loop_8x: + lvx 0, 20, 10 + lvx 1, 21, 10 + lvx 2, 22, 10 + lvx 3, 23, 10 + xxspltw 32+4, 17, 0 + xxspltw 32+5, 17, 1 + xxspltw 32+6, 17, 2 + xxspltw 32+7, 17, 3 + xxspltw 32+8, 18, 0 + xxspltw 32+9, 18, 1 + xxspltw 32+10, 18, 2 + xxspltw 32+11, 18, 3 + xxspltw 32+12, 19, 0 + xxspltw 32+13, 19, 1 + xxspltw 32+14, 19, 2 + xxspltw 32+15, 19, 3 + vadduwm 12, 12, 30 # increase counter + + lvx 16, 20, 10 + lvx 17, 21, 10 + lvx 18, 22, 10 + lvx 19, 23, 10 + xxspltw 32+20, 17, 0 + xxspltw 32+21, 17, 1 + xxspltw 32+22, 17, 2 + xxspltw 32+23, 17, 3 + xxspltw 32+24, 18, 0 + xxspltw 32+25, 18, 1 + xxspltw 32+26, 18, 2 + xxspltw 32+27, 18, 3 + xxspltw 32+28, 19, 0 + xxspltw 32+29, 19, 1 + vadduwm 28, 28, 31 # increase counter + xxspltw 32+30, 19, 2 + xxspltw 32+31, 19, 3 + +.align 5 +quarter_loop_8x: + QT_loop_8x + + bdnz quarter_loop_8x + + xxlor 0, 32+30, 32+30 + xxlor 32+30, 30, 30 + vadduwm 12, 12, 30 + xxlor 32+30, 0, 0 + TP_4x 0, 1, 2, 3 + TP_4x 4, 5, 6, 7 + TP_4x 8, 9, 10, 11 + TP_4x 12, 13, 14, 15 + + xxlor 0, 48, 48 + xxlor 1, 49, 49 + xxlor 2, 50, 50 + xxlor 3, 51, 51 + xxlor 48, 16, 16 + xxlor 49, 17, 17 + xxlor 50, 18, 18 + xxlor 51, 19, 19 + Add_state 0 + xxlor 48, 0, 0 + xxlor 49, 1, 1 + xxlor 50, 2, 2 + xxlor 51, 3, 3 + Write_256 0 + addi 14, 14, 256 + addi 15, 15, -256 + + xxlor 5, 32+31, 32+31 + xxlor 32+31, 31, 31 + vadduwm 28, 28, 31 + xxlor 32+31, 5, 5 + TP_4x 16+0, 16+1, 16+2, 16+3 + TP_4x 16+4, 16+5, 16+6, 16+7 + TP_4x 16+8, 16+9, 16+10, 16+11 + TP_4x 16+12, 16+13, 16+14, 16+15 + + xxlor 32, 16, 16 + xxlor 33, 17, 17 + xxlor 34, 18, 18 + xxlor 35, 19, 19 + Add_state 16 + Write_256 16 + addi 14, 14, 256 + addi 15, 15, -256 + + # should update counter before out? + xxlor 32+24, 24, 24 + xxlor 32+25, 25, 25 + xxlor 32+30, 30, 30 + vadduwm 30, 30, 25 + vadduwm 31, 30, 24 + xxlor 30, 32+30, 32+30 + xxlor 31, 32+31, 32+31 + + cmpdi 15, 0 + beq Out_loop + + cmpdi 15, 512 + blt Loop_last + + mtctr 8 + b Loop_8x + +Loop_last: + lxvw4x 48, 0, 3 # vr16, constants + lxvw4x 49, 17, 3 # vr17, key 1 + lxvw4x 50, 18, 3 # vr18, key 2 + lxvw4x 51, 19, 3 # vr19, counter, nonce + + vspltisw 21, 12 + vspltisw 23, 7 + lxvw4x 32+20, 0, 11 + lxvw4x 32+22, 17, 11 + + li 8, 10 + mtctr 8 + +Loop_4x: + lvx 0, 20, 10 + lvx 1, 21, 10 + lvx 2, 22, 10 + lvx 3, 23, 10 + vspltw 4, 17, 0 + vspltw 5, 17, 1 + vspltw 6, 17, 2 + vspltw 7, 17, 3 + vspltw 8, 18, 0 + vspltw 9, 18, 1 + vspltw 10, 18, 2 + vspltw 11, 18, 3 + vspltw 12, 19, 0 + vadduwm 12, 12, 30 # increase counter + vspltw 13, 19, 1 + vspltw 14, 19, 2 + vspltw 15, 19, 3 + +.align 5 +quarter_loop: + QT_loop_4x + + bdnz quarter_loop + + vadduwm 12, 12, 30 + TP_4x 0, 1, 2, 3 + TP_4x 4, 5, 6, 7 + TP_4x 8, 9, 10, 11 + TP_4x 12, 13, 14, 15 + + Add_state 0 + Write_256 0 + addi 14, 14, 256 + addi 15, 15, -256 + + # Update state counter + vspltisw 25, 4 + vadduwm 30, 30, 25 + + cmpdi 15, 0 + beq Out_loop + + mtctr 8 + b Loop_4x + +Out_loop: + # + # Update state counter + # + vspltisb 16, -1 # first 16 bytes - 0xffff...ff + vspltisb 17, 0 # second 16 bytes - 0x0000...00 + vsldoi 18, 16, 17, 12 + vand 18, 18, 30 + xxlor 32+19, 19, 19 + vadduwm 18, 19, 18 + stxvw4x 32+18, 19, 3 + li 3, 0 + + addi 9, 1, 256 + lvx 20, 0, 9 + lvx 21, 17, 9 + lvx 22, 18, 9 + lvx 23, 19, 9 + lvx 24, 20, 9 + lvx 25, 21, 9 + lvx 26, 22, 9 + lvx 27, 23, 9 + lvx 28, 24, 9 + lvx 29, 25, 9 + lvx 30, 26, 9 + lvx 31, 27, 9 + + add 9, 9, 27 + addi 14, 17, 16 + lxvx 14, 14, 9 + addi 14, 14, 16 + lxvx 15, 14, 9 + addi 14, 14, 16 + lxvx 16, 14, 9 + addi 14, 14, 16 + lxvx 17, 14, 9 + addi 14, 14, 16 + lxvx 18, 14, 9 + addi 14, 14, 16 + lxvx 19, 14, 9 + addi 14, 14, 16 + lxvx 20, 14, 9 + addi 14, 14, 16 + lxvx 21, 14, 9 + addi 14, 14, 16 + lxvx 22, 14, 9 + addi 14, 14, 16 + lxvx 23, 14, 9 + addi 14, 14, 16 + lxvx 24, 14, 9 + addi 14, 14, 16 + lxvx 25, 14, 9 + addi 14, 14, 16 + lxvx 26, 14, 9 + addi 14, 14, 16 + lxvx 27, 14, 9 + addi 14, 14, 16 + lxvx 28, 14, 9 + addi 14, 14, 16 + lxvx 29, 14, 9 + addi 14, 14, 16 + lxvx 30, 14, 9 + addi 14, 14, 16 + lxvx 31, 14, 9 + + ld 0, 1040(1) + ld 14,112(1) + ld 15,120(1) + ld 16,128(1) + ld 17,136(1) + ld 18,144(1) + ld 19,152(1) + ld 20,160(1) + ld 21,168(1) + ld 22,176(1) + ld 23,184(1) + ld 24,192(1) + ld 25,200(1) + ld 26,208(1) + ld 27,216(1) + ld 28,224(1) + ld 29,232(1) + ld 30,240(1) + ld 31,248(1) + + mtlr 0 + addi 1, 1, 1024 + blr + +Out_no_chacha: + li 3, 0 + blr + +.data +.align 4 +sigma: +.long 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 +.long 0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203 +.long 1, 0, 0, 0 +.long 0, 1, 2, 3 +.long 0x61707865, 0x61707865, 0x61707865, 0x61707865 +.long 0x3320646e, 0x3320646e, 0x3320646e, 0x3320646e +.long 0x79622d32, 0x79622d32, 0x79622d32, 0x79622d32 +.long 0x6b206574, 0x6b206574, 0x6b206574, 0x6b206574 +permx: +.long 0x22330011, 0x66774455, 0xaabb8899, 0xeeffccdd +.long 0x11223300, 0x55667744, 0x99aabb88, 0xddeeffcc diff --git a/cipher/chacha20.c b/cipher/chacha20.c index 8dec4317..3518d95a 100644 --- a/cipher/chacha20.c +++ b/cipher/chacha20.c @@ -1,1360 +1,1398 @@ /* chacha20.c - Bernstein's ChaCha20 cipher * Copyright (C) 2014,2017-2019 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * * For a description of the algorithm, see: * http://cr.yp.to/chacha.html */ /* * Based on D. J. Bernstein reference implementation at * http://cr.yp.to/chacha.html: * * chacha-regs.c version 20080118 * D. J. Bernstein * Public domain. */ #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "cipher-internal.h" #include "bufhelp.h" #define CHACHA20_MIN_KEY_SIZE 16 /* Bytes. */ #define CHACHA20_MAX_KEY_SIZE 32 /* Bytes. */ #define CHACHA20_BLOCK_SIZE 64 /* Bytes. */ #define CHACHA20_MIN_IV_SIZE 8 /* Bytes. */ #define CHACHA20_MAX_IV_SIZE 12 /* Bytes. */ #define CHACHA20_CTR_SIZE 16 /* Bytes. */ /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ #undef USE_SSSE3 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSSE3 1 #endif /* USE_AVX2 indicates whether to compile with Intel AVX2 code. */ #undef USE_AVX2 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX2 1 #endif /* USE_AVX512 indicates whether to compile with Intel AVX512 code. */ #undef USE_AVX512 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX512) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX512 1 #endif /* USE_ARMV7_NEON indicates whether to enable ARMv7 NEON assembly code. */ #undef USE_ARMV7_NEON #ifdef ENABLE_NEON_SUPPORT # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_NEON) # define USE_ARMV7_NEON 1 # endif #endif /* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly * code. */ #undef USE_AARCH64_SIMD #ifdef ENABLE_NEON_SUPPORT # if defined(__AARCH64EL__) \ && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) # define USE_AARCH64_SIMD 1 # endif #endif /* USE_PPC_VEC indicates whether to enable PowerPC vector * accelerated code. */ #undef USE_PPC_VEC #ifdef ENABLE_PPC_CRYPTO_SUPPORT # if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) # if __GNUC__ >= 4 # define USE_PPC_VEC 1 # endif # endif #endif /* USE_S390X_VX indicates whether to enable zSeries code. */ #undef USE_S390X_VX #if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 # if defined(HAVE_GCC_INLINE_ASM_S390X_VX) # define USE_S390X_VX 1 # endif /* USE_S390X_VX */ #endif /* Assembly implementations use SystemV ABI, ABI conversion and additional * stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI #undef ASM_EXTRA_STACK #if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) # define ASM_FUNC_ABI __attribute__((sysv_abi)) #else # define ASM_FUNC_ABI #endif typedef struct CHACHA20_context_s { u32 input[16]; unsigned char pad[CHACHA20_BLOCK_SIZE]; unsigned int unused; /* bytes in the pad. */ unsigned int use_ssse3:1; unsigned int use_avx2:1; unsigned int use_avx512:1; unsigned int use_neon:1; unsigned int use_ppc:1; + unsigned int use_p10:1; unsigned int use_s390x:1; } CHACHA20_context_t; #ifdef USE_SSSE3 unsigned int _gcry_chacha20_amd64_ssse3_blocks4(u32 *state, byte *dst, const byte *src, size_t nblks) ASM_FUNC_ABI; unsigned int _gcry_chacha20_amd64_ssse3_blocks1(u32 *state, byte *dst, const byte *src, size_t nblks) ASM_FUNC_ABI; unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks4( u32 *state, byte *dst, const byte *src, size_t nblks, void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks1( u32 *state, byte *dst, const byte *src, size_t nblks, void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; #endif /* USE_SSSE3 */ #ifdef USE_AVX2 unsigned int _gcry_chacha20_amd64_avx2_blocks8(u32 *state, byte *dst, const byte *src, size_t nblks) ASM_FUNC_ABI; unsigned int _gcry_chacha20_poly1305_amd64_avx2_blocks8( u32 *state, byte *dst, const byte *src, size_t nblks, void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; #endif /* USE_AVX2 */ #ifdef USE_AVX512 unsigned int _gcry_chacha20_amd64_avx512_blocks16(u32 *state, byte *dst, const byte *src, size_t nblks) ASM_FUNC_ABI; #endif /* USE_AVX2 */ #ifdef USE_PPC_VEC +#ifndef WORDS_BIGENDIAN +unsigned int _gcry_chacha20_p10le_8x(u32 *state, byte *dst, + const byte *src, + size_t len); +#endif + unsigned int _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src, size_t nblks); unsigned int _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, size_t nblks); #undef USE_PPC_VEC_POLY1305 #if SIZEOF_UNSIGNED_LONG == 8 #define USE_PPC_VEC_POLY1305 1 unsigned int _gcry_chacha20_poly1305_ppc8_blocks4( u32 *state, byte *dst, const byte *src, size_t nblks, POLY1305_STATE *st, const byte *poly1305_src); #endif /* SIZEOF_UNSIGNED_LONG == 8 */ #endif /* USE_PPC_VEC */ #ifdef USE_S390X_VX unsigned int _gcry_chacha20_s390x_vx_blocks8(u32 *state, byte *dst, const byte *src, size_t nblks); unsigned int _gcry_chacha20_s390x_vx_blocks4_2_1(u32 *state, byte *dst, const byte *src, size_t nblks); #undef USE_S390X_VX_POLY1305 #if SIZEOF_UNSIGNED_LONG == 8 #define USE_S390X_VX_POLY1305 1 unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks8( u32 *state, byte *dst, const byte *src, size_t nblks, POLY1305_STATE *st, const byte *poly1305_src); unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( u32 *state, byte *dst, const byte *src, size_t nblks, POLY1305_STATE *st, const byte *poly1305_src); #endif /* SIZEOF_UNSIGNED_LONG == 8 */ #endif /* USE_S390X_VX */ #ifdef USE_ARMV7_NEON unsigned int _gcry_chacha20_armv7_neon_blocks4(u32 *state, byte *dst, const byte *src, size_t nblks); #endif /* USE_ARMV7_NEON */ #ifdef USE_AARCH64_SIMD unsigned int _gcry_chacha20_aarch64_blocks4(u32 *state, byte *dst, const byte *src, size_t nblks); unsigned int _gcry_chacha20_poly1305_aarch64_blocks4( u32 *state, byte *dst, const byte *src, size_t nblks, void *poly1305_state, const byte *poly1305_src); #endif /* USE_AARCH64_SIMD */ static const char *selftest (void); #define ROTATE(v,c) (rol(v,c)) #define XOR(v,w) ((v) ^ (w)) #define PLUS(v,w) ((u32)((v) + (w))) #define PLUSONE(v) (PLUS((v),1)) #define QUARTERROUND(a,b,c,d) \ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); #define BUF_XOR_LE32(dst, src, offset, x) \ buf_put_le32((dst) + (offset), buf_get_le32((src) + (offset)) ^ (x)) static unsigned int do_chacha20_blocks (u32 *input, byte *dst, const byte *src, size_t nblks) { u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; unsigned int i; while (nblks) { x0 = input[0]; x1 = input[1]; x2 = input[2]; x3 = input[3]; x4 = input[4]; x5 = input[5]; x6 = input[6]; x7 = input[7]; x8 = input[8]; x9 = input[9]; x10 = input[10]; x11 = input[11]; x12 = input[12]; x13 = input[13]; x14 = input[14]; x15 = input[15]; for (i = 20; i > 0; i -= 2) { QUARTERROUND(x0, x4, x8, x12) QUARTERROUND(x1, x5, x9, x13) QUARTERROUND(x2, x6, x10, x14) QUARTERROUND(x3, x7, x11, x15) QUARTERROUND(x0, x5, x10, x15) QUARTERROUND(x1, x6, x11, x12) QUARTERROUND(x2, x7, x8, x13) QUARTERROUND(x3, x4, x9, x14) } x0 = PLUS(x0, input[0]); x1 = PLUS(x1, input[1]); x2 = PLUS(x2, input[2]); x3 = PLUS(x3, input[3]); x4 = PLUS(x4, input[4]); x5 = PLUS(x5, input[5]); x6 = PLUS(x6, input[6]); x7 = PLUS(x7, input[7]); x8 = PLUS(x8, input[8]); x9 = PLUS(x9, input[9]); x10 = PLUS(x10, input[10]); x11 = PLUS(x11, input[11]); x12 = PLUS(x12, input[12]); x13 = PLUS(x13, input[13]); x14 = PLUS(x14, input[14]); x15 = PLUS(x15, input[15]); input[12] = PLUSONE(input[12]); input[13] = PLUS(input[13], !input[12]); BUF_XOR_LE32(dst, src, 0, x0); BUF_XOR_LE32(dst, src, 4, x1); BUF_XOR_LE32(dst, src, 8, x2); BUF_XOR_LE32(dst, src, 12, x3); BUF_XOR_LE32(dst, src, 16, x4); BUF_XOR_LE32(dst, src, 20, x5); BUF_XOR_LE32(dst, src, 24, x6); BUF_XOR_LE32(dst, src, 28, x7); BUF_XOR_LE32(dst, src, 32, x8); BUF_XOR_LE32(dst, src, 36, x9); BUF_XOR_LE32(dst, src, 40, x10); BUF_XOR_LE32(dst, src, 44, x11); BUF_XOR_LE32(dst, src, 48, x12); BUF_XOR_LE32(dst, src, 52, x13); BUF_XOR_LE32(dst, src, 56, x14); BUF_XOR_LE32(dst, src, 60, x15); src += CHACHA20_BLOCK_SIZE; dst += CHACHA20_BLOCK_SIZE; nblks--; } /* burn_stack */ return (17 * sizeof(u32) + 6 * sizeof(void *)); } static unsigned int chacha20_blocks (CHACHA20_context_t *ctx, byte *dst, const byte *src, size_t nblks) { #ifdef USE_SSSE3 if (ctx->use_ssse3) { return _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, dst, src, nblks); } #endif #ifdef USE_PPC_VEC if (ctx->use_ppc) { return _gcry_chacha20_ppc8_blocks1(ctx->input, dst, src, nblks); } #endif #ifdef USE_S390X_VX if (ctx->use_s390x) { return _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, dst, src, nblks); } #endif return do_chacha20_blocks (ctx->input, dst, src, nblks); } static void chacha20_keysetup (CHACHA20_context_t *ctx, const byte *key, unsigned int keylen) { static const char sigma[16] = "expand 32-byte k"; static const char tau[16] = "expand 16-byte k"; const char *constants; ctx->input[4] = buf_get_le32(key + 0); ctx->input[5] = buf_get_le32(key + 4); ctx->input[6] = buf_get_le32(key + 8); ctx->input[7] = buf_get_le32(key + 12); if (keylen == CHACHA20_MAX_KEY_SIZE) /* 256 bits */ { key += 16; constants = sigma; } else /* 128 bits */ { constants = tau; } ctx->input[8] = buf_get_le32(key + 0); ctx->input[9] = buf_get_le32(key + 4); ctx->input[10] = buf_get_le32(key + 8); ctx->input[11] = buf_get_le32(key + 12); ctx->input[0] = buf_get_le32(constants + 0); ctx->input[1] = buf_get_le32(constants + 4); ctx->input[2] = buf_get_le32(constants + 8); ctx->input[3] = buf_get_le32(constants + 12); } static void chacha20_ivsetup (CHACHA20_context_t * ctx, const byte *iv, size_t ivlen) { if (ivlen == CHACHA20_CTR_SIZE) { ctx->input[12] = buf_get_le32 (iv + 0); ctx->input[13] = buf_get_le32 (iv + 4); ctx->input[14] = buf_get_le32 (iv + 8); ctx->input[15] = buf_get_le32 (iv + 12); } else if (ivlen == CHACHA20_MAX_IV_SIZE) { ctx->input[12] = 0; ctx->input[13] = buf_get_le32 (iv + 0); ctx->input[14] = buf_get_le32 (iv + 4); ctx->input[15] = buf_get_le32 (iv + 8); } else if (ivlen == CHACHA20_MIN_IV_SIZE) { ctx->input[12] = 0; ctx->input[13] = 0; ctx->input[14] = buf_get_le32 (iv + 0); ctx->input[15] = buf_get_le32 (iv + 4); } else { ctx->input[12] = 0; ctx->input[13] = 0; ctx->input[14] = 0; ctx->input[15] = 0; } } static void chacha20_setiv (void *context, const byte *iv, size_t ivlen) { CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; /* draft-nir-cfrg-chacha20-poly1305-02 defines 96-bit and 64-bit nonce. */ if (iv && ivlen != CHACHA20_MAX_IV_SIZE && ivlen != CHACHA20_MIN_IV_SIZE && ivlen != CHACHA20_CTR_SIZE) log_info ("WARNING: chacha20_setiv: bad ivlen=%u\n", (u32) ivlen); if (iv && (ivlen == CHACHA20_MAX_IV_SIZE || ivlen == CHACHA20_MIN_IV_SIZE || ivlen == CHACHA20_CTR_SIZE)) chacha20_ivsetup (ctx, iv, ivlen); else chacha20_ivsetup (ctx, NULL, 0); /* Reset the unused pad bytes counter. */ ctx->unused = 0; } static gcry_err_code_t chacha20_do_setkey (CHACHA20_context_t *ctx, const byte *key, unsigned int keylen) { static int initialized; static const char *selftest_failed; unsigned int features = _gcry_get_hw_features (); if (!initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("CHACHA20 selftest failed (%s)\n", selftest_failed); } if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; if (keylen != CHACHA20_MAX_KEY_SIZE && keylen != CHACHA20_MIN_KEY_SIZE) return GPG_ERR_INV_KEYLEN; #ifdef USE_SSSE3 ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; #endif #ifdef USE_AVX512 ctx->use_avx512 = (features & HWF_INTEL_AVX512) != 0; #endif #ifdef USE_AVX2 ctx->use_avx2 = (features & HWF_INTEL_AVX2) != 0; #endif #ifdef USE_ARMV7_NEON ctx->use_neon = (features & HWF_ARM_NEON) != 0; #endif #ifdef USE_AARCH64_SIMD ctx->use_neon = (features & HWF_ARM_NEON) != 0; #endif #ifdef USE_PPC_VEC ctx->use_ppc = (features & HWF_PPC_ARCH_2_07) != 0; +# ifndef WORDS_BIGENDIAN + ctx->use_p10 = (features & HWF_PPC_ARCH_3_10) != 0; +# endif #endif #ifdef USE_S390X_VX ctx->use_s390x = (features & HWF_S390X_VX) != 0; #endif (void)features; chacha20_keysetup (ctx, key, keylen); /* We default to a zero nonce. */ chacha20_setiv (ctx, NULL, 0); return 0; } static gcry_err_code_t chacha20_setkey (void *context, const byte *key, unsigned int keylen, cipher_bulk_ops_t *bulk_ops) { CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen); (void)bulk_ops; _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); return rc; } static unsigned int do_chacha20_encrypt_stream_tail (CHACHA20_context_t *ctx, byte *outbuf, const byte *inbuf, size_t length) { static const unsigned char zero_pad[CHACHA20_BLOCK_SIZE] = { 0, }; unsigned int nburn, burn = 0; #ifdef USE_AVX512 if (ctx->use_avx512 && length >= CHACHA20_BLOCK_SIZE * 16) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 16; nburn = _gcry_chacha20_amd64_avx512_blocks16(ctx->input, outbuf, inbuf, nblocks); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_AVX2 if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 8; nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, nblocks); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_SSSE3 if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, nblocks); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_ARMV7_NEON if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_armv7_neon_blocks4(ctx->input, outbuf, inbuf, nblocks); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_AARCH64_SIMD if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, nblocks); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_PPC_VEC if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; - nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, nblocks); +#ifndef WORDS_BIGENDIAN + /* + * A workaround to skip counter overflow. This is rare. + */ + if (ctx->use_p10 && nblocks >= 8 + && ((u64)ctx->input[12] + nblocks) <= 0xffffffffU) + { + size_t len = nblocks * CHACHA20_BLOCK_SIZE; + nburn = _gcry_chacha20_p10le_8x(ctx->input, outbuf, inbuf, len); + } + else +#endif + { + nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, + nblocks); + } burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_S390X_VX if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 8) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 8; nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, nblocks); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif if (length >= CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nburn = chacha20_blocks(ctx, outbuf, inbuf, nblocks); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } if (length > 0) { nburn = chacha20_blocks(ctx, ctx->pad, zero_pad, 1); burn = nburn > burn ? nburn : burn; buf_xor (outbuf, inbuf, ctx->pad, length); ctx->unused = CHACHA20_BLOCK_SIZE - length; } if (burn) burn += 5 * sizeof(void *); return burn; } static void chacha20_encrypt_stream (void *context, byte *outbuf, const byte *inbuf, size_t length) { CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; unsigned int nburn, burn = 0; if (!length) return; if (ctx->unused) { unsigned char *p = ctx->pad; size_t n; gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); n = ctx->unused; if (n > length) n = length; buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); length -= n; outbuf += n; inbuf += n; ctx->unused -= n; if (!length) return; gcry_assert (!ctx->unused); } nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, length); burn = nburn > burn ? nburn : burn; if (burn) _gcry_burn_stack (burn); } gcry_err_code_t _gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, size_t length) { CHACHA20_context_t *ctx = (void *) &c->context.c; unsigned int nburn, burn = 0; byte *authptr = NULL; if (!length) return 0; if (ctx->unused) { unsigned char *p = ctx->pad; size_t n; gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); n = ctx->unused; if (n > length) n = length; buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, n); burn = nburn > burn ? nburn : burn; length -= n; outbuf += n; inbuf += n; ctx->unused -= n; if (!length) { if (burn) _gcry_burn_stack (burn); return 0; } gcry_assert (!ctx->unused); } gcry_assert (c->u_mode.poly1305.ctx.leftover == 0); if (0) { } #ifdef USE_AVX512 else if (ctx->use_avx512) { /* Skip stitched chacha20-poly1305 for AVX512. */ authptr = NULL; } #endif #ifdef USE_AVX2 else if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8) { nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, 8); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 8 * CHACHA20_BLOCK_SIZE; outbuf += 8 * CHACHA20_BLOCK_SIZE; inbuf += 8 * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_SSSE3 else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4) { nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, 4); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 4 * CHACHA20_BLOCK_SIZE; outbuf += 4 * CHACHA20_BLOCK_SIZE; inbuf += 4 * CHACHA20_BLOCK_SIZE; } else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 2) { nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 2); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 2 * CHACHA20_BLOCK_SIZE; outbuf += 2 * CHACHA20_BLOCK_SIZE; inbuf += 2 * CHACHA20_BLOCK_SIZE; } else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE) { nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 1); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 1 * CHACHA20_BLOCK_SIZE; outbuf += 1 * CHACHA20_BLOCK_SIZE; inbuf += 1 * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_AARCH64_SIMD else if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) { nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, 4); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 4 * CHACHA20_BLOCK_SIZE; outbuf += 4 * CHACHA20_BLOCK_SIZE; inbuf += 4 * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_PPC_VEC_POLY1305 + else if (ctx->use_ppc && ctx->use_p10) + { + /* Skip stitched chacha20-poly1305 for P10. */ + authptr = NULL; + } else if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) { nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 4 * CHACHA20_BLOCK_SIZE; outbuf += 4 * CHACHA20_BLOCK_SIZE; inbuf += 4 * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_S390X_VX_POLY1305 else if (ctx->use_s390x && length >= 2 * CHACHA20_BLOCK_SIZE * 8) { nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, 8); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 8 * CHACHA20_BLOCK_SIZE; outbuf += 8 * CHACHA20_BLOCK_SIZE; inbuf += 8 * CHACHA20_BLOCK_SIZE; } else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 4) { nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 4); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 4 * CHACHA20_BLOCK_SIZE; outbuf += 4 * CHACHA20_BLOCK_SIZE; inbuf += 4 * CHACHA20_BLOCK_SIZE; } else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 2) { nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 2); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 2 * CHACHA20_BLOCK_SIZE; outbuf += 2 * CHACHA20_BLOCK_SIZE; inbuf += 2 * CHACHA20_BLOCK_SIZE; } else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE) { nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 1); burn = nburn > burn ? nburn : burn; authptr = outbuf; length -= 1 * CHACHA20_BLOCK_SIZE; outbuf += 1 * CHACHA20_BLOCK_SIZE; inbuf += 1 * CHACHA20_BLOCK_SIZE; } #endif if (authptr) { size_t authoffset = outbuf - authptr; #ifdef USE_AVX2 if (ctx->use_avx2 && length >= 8 * CHACHA20_BLOCK_SIZE && authoffset >= 8 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 8; nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, authptr); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; authptr += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_SSSE3 if (ctx->use_ssse3) { if (length >= 4 * CHACHA20_BLOCK_SIZE && authoffset >= 4 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, authptr); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; authptr += nblocks * CHACHA20_BLOCK_SIZE; } if (length >= CHACHA20_BLOCK_SIZE && authoffset >= CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, authptr); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; authptr += nblocks * CHACHA20_BLOCK_SIZE; } } #endif #ifdef USE_AARCH64_SIMD if (ctx->use_neon && length >= 4 * CHACHA20_BLOCK_SIZE && authoffset >= 4 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_poly1305_aarch64_blocks4( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, authptr); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; authptr += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_PPC_VEC_POLY1305 if (ctx->use_ppc && length >= 4 * CHACHA20_BLOCK_SIZE && authoffset >= 4 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_poly1305_ppc8_blocks4( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, authptr); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; authptr += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_S390X_VX_POLY1305 if (ctx->use_s390x) { if (length >= 8 * CHACHA20_BLOCK_SIZE && authoffset >= 8 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 8; burn = _gcry_chacha20_poly1305_s390x_vx_blocks8( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, authptr); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; authptr += nblocks * CHACHA20_BLOCK_SIZE; } if (length >= CHACHA20_BLOCK_SIZE && authoffset >= CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; burn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, authptr); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; authptr += nblocks * CHACHA20_BLOCK_SIZE; } } #endif if (authoffset > 0) { _gcry_poly1305_update (&c->u_mode.poly1305.ctx, authptr, authoffset); authptr += authoffset; authoffset = 0; } gcry_assert(authptr == outbuf); } while (length) { size_t currlen = length; /* Since checksumming is done after encryption, process input in 24KiB * chunks to keep data loaded in L1 cache for checksumming. However * only do splitting if input is large enough so that last chunks does * not end up being short. */ if (currlen > 32 * 1024) currlen = 24 * 1024; nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen); burn = nburn > burn ? nburn : burn; nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, currlen); burn = nburn > burn ? nburn : burn; outbuf += currlen; inbuf += currlen; length -= currlen; } if (burn) _gcry_burn_stack (burn); return 0; } gcry_err_code_t _gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, size_t length) { CHACHA20_context_t *ctx = (void *) &c->context.c; unsigned int nburn, burn = 0; int skip_stitched = 0; if (!length) return 0; if (ctx->unused) { unsigned char *p = ctx->pad; size_t n; gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); n = ctx->unused; if (n > length) n = length; nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, n); burn = nburn > burn ? nburn : burn; buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); length -= n; outbuf += n; inbuf += n; ctx->unused -= n; if (!length) { if (burn) _gcry_burn_stack (burn); return 0; } gcry_assert (!ctx->unused); } gcry_assert (c->u_mode.poly1305.ctx.leftover == 0); #ifdef USE_AVX512 if (ctx->use_avx512) { /* Skip stitched chacha20-poly1305 for AVX512. */ skip_stitched = 1; } #endif +#ifdef USE_PPC_VEC_POLY1305 + if (ctx->use_ppc && ctx->use_p10) + { + /* Skip stitched chacha20-poly1305 for P10. */ + skip_stitched = 1; + } +#endif #ifdef USE_AVX2 if (!skip_stitched && ctx->use_avx2 && length >= 8 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 8; nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, inbuf); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_SSSE3 if (!skip_stitched && ctx->use_ssse3) { if (length >= 4 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, inbuf); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } if (length >= CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, inbuf); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } } #endif #ifdef USE_AARCH64_SIMD if (!skip_stitched && ctx->use_neon && length >= 4 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_poly1305_aarch64_blocks4( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, inbuf); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_PPC_VEC_POLY1305 + /* skip stitch for p10 */ if (!skip_stitched && ctx->use_ppc && length >= 4 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; nburn = _gcry_chacha20_poly1305_ppc8_blocks4( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, inbuf); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } #endif #ifdef USE_S390X_VX_POLY1305 if (!skip_stitched && ctx->use_s390x) { if (length >= 8 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 8; nburn = _gcry_chacha20_poly1305_s390x_vx_blocks8( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, inbuf); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } if (length >= CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nburn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, inbuf); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; inbuf += nblocks * CHACHA20_BLOCK_SIZE; } } #endif while (length) { size_t currlen = length; /* Since checksumming is done before decryption, process input in 24KiB * chunks to keep data loaded in L1 cache for decryption. However only * do splitting if input is large enough so that last chunks does not * end up being short. */ if (currlen > 32 * 1024) currlen = 24 * 1024; nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, currlen); burn = nburn > burn ? nburn : burn; nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen); burn = nburn > burn ? nburn : burn; outbuf += currlen; inbuf += currlen; length -= currlen; } if (burn) _gcry_burn_stack (burn); return 0; } static const char * selftest (void) { byte ctxbuf[sizeof(CHACHA20_context_t) + 15]; CHACHA20_context_t *ctx; byte scratch[127 + 1]; byte buf[512 + 64 + 4]; int i; /* From draft-strombergson-chacha-test-vectors */ static byte key_1[] = { 0xc4, 0x6e, 0xc1, 0xb1, 0x8c, 0xe8, 0xa8, 0x78, 0x72, 0x5a, 0x37, 0xe7, 0x80, 0xdf, 0xb7, 0x35, 0x1f, 0x68, 0xed, 0x2e, 0x19, 0x4c, 0x79, 0xfb, 0xc6, 0xae, 0xbe, 0xe1, 0xa6, 0x67, 0x97, 0x5d }; static const byte nonce_1[] = { 0x1a, 0xda, 0x31, 0xd5, 0xcf, 0x68, 0x82, 0x21 }; static const byte plaintext_1[127] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static const byte ciphertext_1[127] = { 0xf6, 0x3a, 0x89, 0xb7, 0x5c, 0x22, 0x71, 0xf9, 0x36, 0x88, 0x16, 0x54, 0x2b, 0xa5, 0x2f, 0x06, 0xed, 0x49, 0x24, 0x17, 0x92, 0x30, 0x2b, 0x00, 0xb5, 0xe8, 0xf8, 0x0a, 0xe9, 0xa4, 0x73, 0xaf, 0xc2, 0x5b, 0x21, 0x8f, 0x51, 0x9a, 0xf0, 0xfd, 0xd4, 0x06, 0x36, 0x2e, 0x8d, 0x69, 0xde, 0x7f, 0x54, 0xc6, 0x04, 0xa6, 0xe0, 0x0f, 0x35, 0x3f, 0x11, 0x0f, 0x77, 0x1b, 0xdc, 0xa8, 0xab, 0x92, 0xe5, 0xfb, 0xc3, 0x4e, 0x60, 0xa1, 0xd9, 0xa9, 0xdb, 0x17, 0x34, 0x5b, 0x0a, 0x40, 0x27, 0x36, 0x85, 0x3b, 0xf9, 0x10, 0xb0, 0x60, 0xbd, 0xf1, 0xf8, 0x97, 0xb6, 0x29, 0x0f, 0x01, 0xd1, 0x38, 0xae, 0x2c, 0x4c, 0x90, 0x22, 0x5b, 0xa9, 0xea, 0x14, 0xd5, 0x18, 0xf5, 0x59, 0x29, 0xde, 0xa0, 0x98, 0xca, 0x7a, 0x6c, 0xcf, 0xe6, 0x12, 0x27, 0x05, 0x3c, 0x84, 0xe4, 0x9a, 0x4a, 0x33 }; /* 16-byte alignment required for amd64 implementation. */ ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); scratch[sizeof (scratch) - 1] = 0; chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) return "ChaCha20 encryption test 1 failed."; if (scratch[sizeof (scratch) - 1]) return "ChaCha20 wrote too much."; chacha20_setkey (ctx, key_1, sizeof (key_1), NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) return "ChaCha20 decryption test 1 failed."; for (i = 0; i < sizeof buf; i++) buf[i] = i; chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); /*encrypt */ chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); /*decrypt */ chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); chacha20_encrypt_stream (ctx, buf, buf, 1); chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1); chacha20_encrypt_stream (ctx, buf + (sizeof buf) - 1, buf + (sizeof buf) - 1, 1); for (i = 0; i < sizeof buf; i++) if (buf[i] != (byte) i) return "ChaCha20 encryption test 2 failed."; chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); /* encrypt */ for (i = 0; i < sizeof buf; i++) chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1); /* decrypt */ chacha20_setkey (ctx, key_1, sizeof key_1, NULL); chacha20_setiv (ctx, nonce_1, sizeof nonce_1); chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); for (i = 0; i < sizeof buf; i++) if (buf[i] != (byte) i) return "ChaCha20 encryption test 3 failed."; return NULL; } gcry_cipher_spec_t _gcry_cipher_spec_chacha20 = { GCRY_CIPHER_CHACHA20, {0, 0}, /* flags */ "CHACHA20", /* name */ NULL, /* aliases */ NULL, /* oids */ 1, /* blocksize in bytes. */ CHACHA20_MAX_KEY_SIZE * 8, /* standard key length in bits. */ sizeof (CHACHA20_context_t), chacha20_setkey, NULL, NULL, chacha20_encrypt_stream, chacha20_encrypt_stream, NULL, NULL, chacha20_setiv }; diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h index 9e01df46..26d82103 100644 --- a/cipher/poly1305-internal.h +++ b/cipher/poly1305-internal.h @@ -1,77 +1,92 @@ /* poly1305-internal.h - Poly1305 internals * Copyright (C) 2014 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifndef G10_POLY1305_INTERNAL_H #define G10_POLY1305_INTERNAL_H #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #define POLY1305_TAGLEN 16 #define POLY1305_KEYLEN 32 #define POLY1305_BLOCKSIZE 16 /* POLY1305_USE_AVX512 indicates whether to compile with Intel AVX512 code. */ #undef POLY1305_USE_AVX512 #if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX512) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define POLY1305_USE_AVX512 1 #endif +/* POLY1305_USE_PPC_VEC indicates whether to enable PowerPC vector code. */ +#undef POLY1305_USE_PPC_VEC +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ + defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \ + !defined(WORDS_BIGENDIAN) +# if __GNUC__ >= 4 +# define POLY1305_USE_PPC_VEC 1 +# endif +# endif +#endif + typedef struct { u32 k[4]; u32 r[4]; u32 h[5]; } POLY1305_STATE; typedef struct poly1305_context_s { POLY1305_STATE state; byte buffer[POLY1305_BLOCKSIZE]; unsigned int leftover; #ifdef POLY1305_USE_AVX512 unsigned int use_avx512:1; #endif +#ifdef POLY1305_USE_PPC_VEC + unsigned int use_p10:1; +#endif } poly1305_context_t; gcry_err_code_t _gcry_poly1305_init (poly1305_context_t *ctx, const byte *key, size_t keylen); void _gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]); void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *buf, size_t buflen); unsigned int _gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m, size_t bytes); #endif /* G10_POLY1305_INTERNAL_H */ diff --git a/cipher/poly1305-p10le.s b/cipher/poly1305-p10le.s new file mode 100644 index 00000000..4202b41e --- /dev/null +++ b/cipher/poly1305-p10le.s @@ -0,0 +1,841 @@ +# Copyright 2021- IBM Inc. All rights reserved +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, see . +# +#=================================================================================== +# Written by Danny Tsen +# +# Poly1305 - this version mainly using vector/VSX/Scalar +# - 26 bits limbs +# - Handle multiple 64 byte blcoks but need at least 2 64 bytes block +# +# Improve performance by breaking down polynominal to the sum of products with +# h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r +# +# 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, s1, s0 +# to 9 vectors for multiplications. +# +# setup r^4, r^3, r^2, r vectors +# vs [r^1, r^3, r^2, r^4] +# vs0 = [r0,.....] +# vs1 = [r1,.....] +# vs2 = [r2,.....] +# vs3 = [r3,.....] +# vs4 = [r4,.....] +# vs5 = [r1*5,...] +# vs6 = [r2*5,...] +# vs7 = [r2*5,...] +# vs8 = [r4*5,...] +# +# Each word in a vector consists a member of a "r/s" in [a * r/s]. +# +# r0, r4*5, r3*5, r2*5, r1*5; +# r1, r0, r4*5, r3*5, r2*5; +# r2, r1, r0, r4*5, r3*5; +# r3, r2, r1, r0, r4*5; +# r4, r3, r2, r1, r0 ; +# +# +# gcry_poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m) +# k = 32 bytes key +# r3 = k (r, s) +# r4 = mlen +# r5 = m +# +.text + +# Block size 16 bytes +# key = (r, s) +# clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF +# p = 2^130 - 5 +# a += m +# a = (r + a) % p +# a += s +# 16 bytes (a) +# +# p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5; +# p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5; +# p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5; +# p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5; +# p[4] = a0*r4 + a1*r3 + a2*r2 + a3*r1 + a4*r0 ; +# +# [r^2, r^3, r^1, r^4] +# [m3, m2, m4, m1] +# +# multiply odd and even words +.macro mul_odd + vmulouw 14, 4, 26 + vmulouw 10, 5, 3 + vmulouw 11, 6, 2 + vmulouw 12, 7, 1 + vmulouw 13, 8, 0 + vmulouw 15, 4, 27 + vaddudm 14, 14, 10 + vaddudm 14, 14, 11 + vmulouw 10, 5, 26 + vmulouw 11, 6, 3 + vaddudm 14, 14, 12 + vaddudm 14, 14, 13 # x0 + vaddudm 15, 15, 10 + vaddudm 15, 15, 11 + vmulouw 12, 7, 2 + vmulouw 13, 8, 1 + vaddudm 15, 15, 12 + vaddudm 15, 15, 13 # x1 + vmulouw 16, 4, 28 + vmulouw 10, 5, 27 + vmulouw 11, 6, 26 + vaddudm 16, 16, 10 + vaddudm 16, 16, 11 + vmulouw 12, 7, 3 + vmulouw 13, 8, 2 + vaddudm 16, 16, 12 + vaddudm 16, 16, 13 # x2 + vmulouw 17, 4, 29 + vmulouw 10, 5, 28 + vmulouw 11, 6, 27 + vaddudm 17, 17, 10 + vaddudm 17, 17, 11 + vmulouw 12, 7, 26 + vmulouw 13, 8, 3 + vaddudm 17, 17, 12 + vaddudm 17, 17, 13 # x3 + vmulouw 18, 4, 30 + vmulouw 10, 5, 29 + vmulouw 11, 6, 28 + vaddudm 18, 18, 10 + vaddudm 18, 18, 11 + vmulouw 12, 7, 27 + vmulouw 13, 8, 26 + vaddudm 18, 18, 12 + vaddudm 18, 18, 13 # x4 +.endm + +.macro mul_even + vmuleuw 9, 4, 26 + vmuleuw 10, 5, 3 + vmuleuw 11, 6, 2 + vmuleuw 12, 7, 1 + vmuleuw 13, 8, 0 + vaddudm 14, 14, 9 + vaddudm 14, 14, 10 + vaddudm 14, 14, 11 + vaddudm 14, 14, 12 + vaddudm 14, 14, 13 # x0 + + vmuleuw 9, 4, 27 + vmuleuw 10, 5, 26 + vmuleuw 11, 6, 3 + vmuleuw 12, 7, 2 + vmuleuw 13, 8, 1 + vaddudm 15, 15, 9 + vaddudm 15, 15, 10 + vaddudm 15, 15, 11 + vaddudm 15, 15, 12 + vaddudm 15, 15, 13 # x1 + + vmuleuw 9, 4, 28 + vmuleuw 10, 5, 27 + vmuleuw 11, 6, 26 + vmuleuw 12, 7, 3 + vmuleuw 13, 8, 2 + vaddudm 16, 16, 9 + vaddudm 16, 16, 10 + vaddudm 16, 16, 11 + vaddudm 16, 16, 12 + vaddudm 16, 16, 13 # x2 + + vmuleuw 9, 4, 29 + vmuleuw 10, 5, 28 + vmuleuw 11, 6, 27 + vmuleuw 12, 7, 26 + vmuleuw 13, 8, 3 + vaddudm 17, 17, 9 + vaddudm 17, 17, 10 + vaddudm 17, 17, 11 + vaddudm 17, 17, 12 + vaddudm 17, 17, 13 # x3 + + vmuleuw 9, 4, 30 + vmuleuw 10, 5, 29 + vmuleuw 11, 6, 28 + vmuleuw 12, 7, 27 + vmuleuw 13, 8, 26 + vaddudm 18, 18, 9 + vaddudm 18, 18, 10 + vaddudm 18, 18, 11 + vaddudm 18, 18, 12 + vaddudm 18, 18, 13 # x4 +.endm + +# setup r^4, r^3, r^2, r vectors +# [r, r^3, r^2, r^4] +# vs0 = [r0,...] +# vs1 = [r1,...] +# vs2 = [r2,...] +# vs3 = [r3,...] +# vs4 = [r4,...] +# vs5 = [r4*5,...] +# vs6 = [r3*5,...] +# vs7 = [r2*5,...] +# vs8 = [r1*5,...] +# +# r0, r4*5, r3*5, r2*5, r1*5; +# r1, r0, r4*5, r3*5, r2*5; +# r2, r1, r0, r4*5, r3*5; +# r3, r2, r1, r0, r4*5; +# r4, r3, r2, r1, r0 ; +# +.macro poly1305_setup_r + + # save r + xxlor 26, 58, 58 + xxlor 27, 59, 59 + xxlor 28, 60, 60 + xxlor 29, 61, 61 + xxlor 30, 62, 62 + + xxlxor 31, 31, 31 + +# [r, r^3, r^2, r^4] + # compute r^2 + vmr 4, 26 + vmr 5, 27 + vmr 6, 28 + vmr 7, 29 + vmr 8, 30 + bl do_mul # r^2 r^1 + xxpermdi 58, 58, 36, 0x3 # r0 + xxpermdi 59, 59, 37, 0x3 # r1 + xxpermdi 60, 60, 38, 0x3 # r2 + xxpermdi 61, 61, 39, 0x3 # r3 + xxpermdi 62, 62, 40, 0x3 # r4 + xxpermdi 36, 36, 36, 0x3 + xxpermdi 37, 37, 37, 0x3 + xxpermdi 38, 38, 38, 0x3 + xxpermdi 39, 39, 39, 0x3 + xxpermdi 40, 40, 40, 0x3 + vspltisb 13, 2 + vsld 9, 27, 13 + vsld 10, 28, 13 + vsld 11, 29, 13 + vsld 12, 30, 13 + vaddudm 0, 9, 27 + vaddudm 1, 10, 28 + vaddudm 2, 11, 29 + vaddudm 3, 12, 30 + + bl do_mul # r^4 r^3 + vmrgow 26, 26, 4 + vmrgow 27, 27, 5 + vmrgow 28, 28, 6 + vmrgow 29, 29, 7 + vmrgow 30, 30, 8 + vspltisb 13, 2 + vsld 9, 27, 13 + vsld 10, 28, 13 + vsld 11, 29, 13 + vsld 12, 30, 13 + vaddudm 0, 9, 27 + vaddudm 1, 10, 28 + vaddudm 2, 11, 29 + vaddudm 3, 12, 30 + + # r^2 r^4 + xxlor 0, 58, 58 + xxlor 1, 59, 59 + xxlor 2, 60, 60 + xxlor 3, 61, 61 + xxlor 4, 62, 62 + xxlor 5, 32, 32 + xxlor 6, 33, 33 + xxlor 7, 34, 34 + xxlor 8, 35, 35 + + vspltw 9, 26, 3 + vspltw 10, 26, 2 + vmrgow 26, 10, 9 + vspltw 9, 27, 3 + vspltw 10, 27, 2 + vmrgow 27, 10, 9 + vspltw 9, 28, 3 + vspltw 10, 28, 2 + vmrgow 28, 10, 9 + vspltw 9, 29, 3 + vspltw 10, 29, 2 + vmrgow 29, 10, 9 + vspltw 9, 30, 3 + vspltw 10, 30, 2 + vmrgow 30, 10, 9 + + vsld 9, 27, 13 + vsld 10, 28, 13 + vsld 11, 29, 13 + vsld 12, 30, 13 + vaddudm 0, 9, 27 + vaddudm 1, 10, 28 + vaddudm 2, 11, 29 + vaddudm 3, 12, 30 +.endm + +do_mul: + mul_odd + + # do reduction ( h %= p ) + # carry reduction + vspltisb 9, 2 + vsrd 10, 14, 31 + vsrd 11, 17, 31 + vand 7, 17, 25 + vand 4, 14, 25 + vaddudm 18, 18, 11 + vsrd 12, 18, 31 + vaddudm 15, 15, 10 + + vsrd 11, 15, 31 + vand 8, 18, 25 + vand 5, 15, 25 + vaddudm 4, 4, 12 + vsld 10, 12, 9 + vaddudm 6, 16, 11 + + vsrd 13, 6, 31 + vand 6, 6, 25 + vaddudm 4, 4, 10 + vsrd 10, 4, 31 + vaddudm 7, 7, 13 + + vsrd 11, 7, 31 + vand 7, 7, 25 + vand 4, 4, 25 + vaddudm 5, 5, 10 + vaddudm 8, 8, 11 + blr + +# +# init key +# +do_poly1305_init: + ld 10, rmask@got(2) + ld 11, 0(10) + ld 12, 8(10) + + li 14, 16 + li 15, 32 + ld 10, cnum@got(2) + lvx 25, 0, 10 # v25 - mask + lvx 31, 14, 10 # v31 = 1a + lvx 19, 15, 10 # v19 = 1 << 24 + lxv 24, 48(10) # vs24 + lxv 25, 64(10) # vs25 + + # initialize + # load key from r3 to vectors + ld 9, 16(3) + ld 10, 24(3) + ld 11, 0(3) + ld 12, 8(3) + + # break 26 bits + extrdi 14, 9, 26, 38 + extrdi 15, 9, 26, 12 + extrdi 16, 9, 12, 0 + mtvsrdd 58, 0, 14 + insrdi 16, 10, 14, 38 + mtvsrdd 59, 0, 15 + extrdi 17, 10, 26, 24 + mtvsrdd 60, 0, 16 + extrdi 18, 10, 24, 0 + mtvsrdd 61, 0, 17 + mtvsrdd 62, 0, 18 + + # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5 + li 9, 5 + mtvsrdd 36, 0, 9 + vmulouw 0, 27, 4 # v0 = rr0 + vmulouw 1, 28, 4 # v1 = rr1 + vmulouw 2, 29, 4 # v2 = rr2 + vmulouw 3, 30, 4 # v3 = rr3 + blr + +# +# gcry_poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m) +# k = 32 bytes key +# r3 = k (r, s) +# r4 = mlen +# r5 = m +# +.global gcry_poly1305_p10le_4blocks +.align 5 +gcry_poly1305_p10le_4blocks: +_gcry_poly1305_p10le_4blocks: + cmpdi 5, 128 + blt Out_no_poly1305 + + stdu 1,-1024(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + std 31,248(1) + li 14, 256 + stvx 20, 14, 1 + addi 14, 14, 16 + stvx 21, 14, 1 + addi 14, 14, 16 + stvx 22, 14, 1 + addi 14, 14, 16 + stvx 23, 14, 1 + addi 14, 14, 16 + stvx 24, 14, 1 + addi 14, 14, 16 + stvx 25, 14, 1 + addi 14, 14, 16 + stvx 26, 14, 1 + addi 14, 14, 16 + stvx 27, 14, 1 + addi 14, 14, 16 + stvx 28, 14, 1 + addi 14, 14, 16 + stvx 29, 14, 1 + addi 14, 14, 16 + stvx 30, 14, 1 + addi 14, 14, 16 + stvx 31, 14, 1 + + addi 14, 14, 16 + stxvx 14, 14, 1 + addi 14, 14, 16 + stxvx 15, 14, 1 + addi 14, 14, 16 + stxvx 16, 14, 1 + addi 14, 14, 16 + stxvx 17, 14, 1 + addi 14, 14, 16 + stxvx 18, 14, 1 + addi 14, 14, 16 + stxvx 19, 14, 1 + addi 14, 14, 16 + stxvx 20, 14, 1 + addi 14, 14, 16 + stxvx 21, 14, 1 + addi 14, 14, 16 + stxvx 22, 14, 1 + addi 14, 14, 16 + stxvx 23, 14, 1 + addi 14, 14, 16 + stxvx 24, 14, 1 + addi 14, 14, 16 + stxvx 25, 14, 1 + addi 14, 14, 16 + stxvx 26, 14, 1 + addi 14, 14, 16 + stxvx 27, 14, 1 + addi 14, 14, 16 + stxvx 28, 14, 1 + addi 14, 14, 16 + stxvx 29, 14, 1 + addi 14, 14, 16 + stxvx 30, 14, 1 + addi 14, 14, 16 + stxvx 31, 14, 1 + std 0, 1040(1) + + bl do_poly1305_init + + li 21, 0 # counter to message + + poly1305_setup_r + + # load previous state + # break/convert r6 to 26 bits + ld 9, 32(3) + ld 10, 40(3) + lwz 19, 48(3) + sldi 19, 19, 24 + mtvsrdd 41, 0, 19 + extrdi 14, 9, 26, 38 + extrdi 15, 9, 26, 12 + extrdi 16, 9, 12, 0 + mtvsrdd 36, 0, 14 + insrdi 16, 10, 14, 38 + mtvsrdd 37, 0, 15 + extrdi 17, 10, 26, 24 + mtvsrdd 38, 0, 16 + extrdi 18, 10, 24, 0 + mtvsrdd 39, 0, 17 + mtvsrdd 40, 0, 18 + vor 8, 8, 9 + + # input m1 m2 + add 20, 4, 21 + xxlor 49, 24, 24 + xxlor 50, 25, 25 + lxvw4x 43, 0, 20 + addi 17, 20, 16 + lxvw4x 44, 0, 17 + vperm 14, 11, 12, 17 + vperm 15, 11, 12, 18 + vand 9, 14, 25 # a0 + vsrd 10, 14, 31 # >> 26 + vsrd 11, 10, 31 # 12 bits left + vand 10, 10, 25 # a1 + vspltisb 13, 12 + vand 16, 15, 25 + vsld 12, 16, 13 + vor 11, 11, 12 + vand 11, 11, 25 # a2 + vspltisb 13, 14 + vsrd 12, 15, 13 # >> 14 + vsrd 13, 12, 31 # >> 26, a4 + vand 12, 12, 25 # a3 + + vaddudm 20, 4, 9 + vaddudm 21, 5, 10 + vaddudm 22, 6, 11 + vaddudm 23, 7, 12 + vaddudm 24, 8, 13 + + # m3 m4 + addi 17, 17, 16 + lxvw4x 43, 0, 17 + addi 17, 17, 16 + lxvw4x 44, 0, 17 + vperm 14, 11, 12, 17 + vperm 15, 11, 12, 18 + vand 9, 14, 25 # a0 + vsrd 10, 14, 31 # >> 26 + vsrd 11, 10, 31 # 12 bits left + vand 10, 10, 25 # a1 + vspltisb 13, 12 + vand 16, 15, 25 + vsld 12, 16, 13 + vspltisb 13, 14 + vor 11, 11, 12 + vand 11, 11, 25 # a2 + vsrd 12, 15, 13 # >> 14 + vsrd 13, 12, 31 # >> 26, a4 + vand 12, 12, 25 # a3 + + # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1] + vmrgow 4, 9, 20 + vmrgow 5, 10, 21 + vmrgow 6, 11, 22 + vmrgow 7, 12, 23 + vmrgow 8, 13, 24 + vaddudm 8, 8, 19 + + addi 5, 5, -64 + addi 21, 21, 64 + + li 9, 64 + divdu 31, 5, 9 + + mtctr 31 + +# h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r +# Rewrite the polynominal sum of product as follows, +# h1 = (h0 + m1) * r^2, h2 = (h0 + m2) * r^2 +# h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h0 + m4) r^2 +# .... Repeat +# h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 --> +# h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r +# +loop_4blocks: + + # Multiply odd words and even words + mul_odd + mul_even + # carry reduction + vspltisb 9, 2 + vsrd 10, 14, 31 + vsrd 11, 17, 31 + vand 7, 17, 25 + vand 4, 14, 25 + vaddudm 18, 18, 11 + vsrd 12, 18, 31 + vaddudm 15, 15, 10 + + vsrd 11, 15, 31 + vand 8, 18, 25 + vand 5, 15, 25 + vaddudm 4, 4, 12 + vsld 10, 12, 9 + vaddudm 6, 16, 11 + + vsrd 13, 6, 31 + vand 6, 6, 25 + vaddudm 4, 4, 10 + vsrd 10, 4, 31 + vaddudm 7, 7, 13 + + vsrd 11, 7, 31 + vand 7, 7, 25 + vand 4, 4, 25 + vaddudm 5, 5, 10 + vaddudm 8, 8, 11 + + # input m1 m2 m3 m4 + add 20, 4, 21 + xxlor 49, 24, 24 + xxlor 50, 25, 25 + lxvw4x 43, 0, 20 + addi 17, 20, 16 + lxvw4x 44, 0, 17 + vperm 14, 11, 12, 17 + vperm 15, 11, 12, 18 + addi 17, 17, 16 + lxvw4x 43, 0, 17 + addi 17, 17, 16 + lxvw4x 44, 0, 17 + vperm 17, 11, 12, 17 + vperm 18, 11, 12, 18 + + vand 20, 14, 25 # a0 + vand 9, 17, 25 # a0 + vsrd 21, 14, 31 # >> 26 + vsrd 22, 21, 31 # 12 bits left + vsrd 10, 17, 31 # >> 26 + vsrd 11, 10, 31 # 12 bits left + + vand 21, 21, 25 # a1 + vand 10, 10, 25 # a1 + + vspltisb 13, 12 + vand 16, 15, 25 + vsld 23, 16, 13 + vor 22, 22, 23 + vand 22, 22, 25 # a2 + vand 16, 18, 25 + vsld 12, 16, 13 + vor 11, 11, 12 + vand 11, 11, 25 # a2 + vspltisb 13, 14 + vsrd 23, 15, 13 # >> 14 + vsrd 24, 23, 31 # >> 26, a4 + vand 23, 23, 25 # a3 + vsrd 12, 18, 13 # >> 14 + vsrd 13, 12, 31 # >> 26, a4 + vand 12, 12, 25 # a3 + + vaddudm 4, 4, 20 + vaddudm 5, 5, 21 + vaddudm 6, 6, 22 + vaddudm 7, 7, 23 + vaddudm 8, 8, 24 + + # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1] + vmrgow 4, 9, 4 + vmrgow 5, 10, 5 + vmrgow 6, 11, 6 + vmrgow 7, 12, 7 + vmrgow 8, 13, 8 + vaddudm 8, 8, 19 + + addi 5, 5, -64 + addi 21, 21, 64 + + bdnz loop_4blocks + + xxlor 58, 0, 0 + xxlor 59, 1, 1 + xxlor 60, 2, 2 + xxlor 61, 3, 3 + xxlor 62, 4, 4 + xxlor 32, 5, 5 + xxlor 33, 6, 6 + xxlor 34, 7, 7 + xxlor 35, 8, 8 + + # Multiply odd words and even words + mul_odd + mul_even + + # Sum the products. + xxpermdi 41, 31, 46, 0 + xxpermdi 42, 31, 47, 0 + vaddudm 4, 14, 9 + xxpermdi 36, 31, 36, 3 + vaddudm 5, 15, 10 + xxpermdi 37, 31, 37, 3 + xxpermdi 43, 31, 48, 0 + vaddudm 6, 16, 11 + xxpermdi 38, 31, 38, 3 + xxpermdi 44, 31, 49, 0 + vaddudm 7, 17, 12 + xxpermdi 39, 31, 39, 3 + xxpermdi 45, 31, 50, 0 + vaddudm 8, 18, 13 + xxpermdi 40, 31, 40, 3 + + # carry reduction + vspltisb 9, 2 + vsrd 10, 4, 31 + vsrd 11, 7, 31 + vand 7, 7, 25 + vand 4, 4, 25 + vaddudm 8, 8, 11 + vsrd 12, 8, 31 + vaddudm 5, 5, 10 + + vsrd 11, 5, 31 + vand 8, 8, 25 + vand 5, 5, 25 + vaddudm 4, 4, 12 + vsld 10, 12, 9 + vaddudm 6, 6, 11 + + vsrd 13, 6, 31 + vand 6, 6, 25 + vaddudm 4, 4, 10 + vsrd 10, 4, 31 + vaddudm 7, 7, 13 + + vsrd 11, 7, 31 + vand 7, 7, 25 + vand 4, 4, 25 + vaddudm 5, 5, 10 + vaddudm 8, 8, 11 + + b do_final_update + +do_final_update: + # v4, v5, v6, v7 and v8 are 26 bit vectors + vsld 5, 5, 31 + vor 20, 4, 5 + vspltisb 11, 12 + vsrd 12, 6, 11 + vsld 6, 6, 31 + vsld 6, 6, 31 + vor 20, 20, 6 + vspltisb 11, 14 + vsld 7, 7, 11 + vor 21, 7, 12 + mfvsrld 16, 40 # save last 2 bytes + vsld 8, 8, 11 + vsld 8, 8, 31 + vor 21, 21, 8 + mfvsrld 17, 52 + mfvsrld 19, 53 + srdi 16, 16, 24 + + std 17, 32(3) + std 19, 40(3) + stw 16, 48(3) + +Out_loop: + li 3, 0 + + li 14, 256 + lvx 20, 14, 1 + addi 14, 14, 16 + lvx 21, 14, 1 + addi 14, 14, 16 + lvx 22, 14, 1 + addi 14, 14, 16 + lvx 23, 14, 1 + addi 14, 14, 16 + lvx 24, 14, 1 + addi 14, 14, 16 + lvx 25, 14, 1 + addi 14, 14, 16 + lvx 26, 14, 1 + addi 14, 14, 16 + lvx 27, 14, 1 + addi 14, 14, 16 + lvx 28, 14, 1 + addi 14, 14, 16 + lvx 29, 14, 1 + addi 14, 14, 16 + lvx 30, 14, 1 + addi 14, 14, 16 + lvx 31, 14, 1 + + addi 14, 14, 16 + lxvx 14, 14, 1 + addi 14, 14, 16 + lxvx 15, 14, 1 + addi 14, 14, 16 + lxvx 16, 14, 1 + addi 14, 14, 16 + lxvx 17, 14, 1 + addi 14, 14, 16 + lxvx 18, 14, 1 + addi 14, 14, 16 + lxvx 19, 14, 1 + addi 14, 14, 16 + lxvx 20, 14, 1 + addi 14, 14, 16 + lxvx 21, 14, 1 + addi 14, 14, 16 + lxvx 22, 14, 1 + addi 14, 14, 16 + lxvx 23, 14, 1 + addi 14, 14, 16 + lxvx 24, 14, 1 + addi 14, 14, 16 + lxvx 25, 14, 1 + addi 14, 14, 16 + lxvx 26, 14, 1 + addi 14, 14, 16 + lxvx 27, 14, 1 + addi 14, 14, 16 + lxvx 28, 14, 1 + addi 14, 14, 16 + lxvx 29, 14, 1 + addi 14, 14, 16 + lxvx 30, 14, 1 + addi 14, 14, 16 + lxvx 31, 14, 1 + + ld 0, 1040(1) + ld 14,112(1) + ld 15,120(1) + ld 16,128(1) + ld 17,136(1) + ld 18,144(1) + ld 19,152(1) + ld 20,160(1) + ld 21,168(1) + ld 31,248(1) + + mtlr 0 + addi 1, 1, 1024 + blr + +Out_no_poly1305: + li 3, 0 + blr + +.data +.align 5 +rmask: +.byte 0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f +cnum: +.long 0x03ffffff, 0x00000000, 0x03ffffff, 0x00000000 +.long 0x1a, 0x00, 0x1a, 0x00 +.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 +.long 0x00010203, 0x04050607, 0x10111213, 0x14151617 +.long 0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f +.long 0x05, 0x00, 0x00, 0x00 +.long 0x02020202, 0x02020202, 0x02020202, 0x02020202 +.long 0xffffffff, 0xffffffff, 0x00000000, 0x00000000 diff --git a/cipher/poly1305.c b/cipher/poly1305.c index 5482fc6a..f5f3f85a 100644 --- a/cipher/poly1305.c +++ b/cipher/poly1305.c @@ -1,809 +1,838 @@ /* poly1305.c - Poly1305 internals and generic implementation * Copyright (C) 2014,2017,2018 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "poly1305-internal.h" #include "mpi-internal.h" #include "longlong.h" static const char *selftest (void); #undef HAVE_ASM_POLY1305_BLOCKS #undef USE_MPI_64BIT #undef USE_MPI_32BIT #if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_TYPE_U64) # define USE_MPI_64BIT 1 #elif BYTES_PER_MPI_LIMB == 4 # define USE_MPI_32BIT 1 #else # error please implement for this limb size. #endif /* USE_S390X_ASM indicates whether to enable zSeries code. */ #undef USE_S390X_ASM #if BYTES_PER_MPI_LIMB == 8 # if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 # if defined(HAVE_GCC_INLINE_ASM_S390X) # define USE_S390X_ASM 1 # endif /* USE_S390X_ASM */ # endif #endif /* AMD64 Assembly implementations use SystemV ABI, ABI conversion and * additional stack to store XMM6-XMM15 needed on Win64. */ #undef ASM_FUNC_ABI #undef ASM_FUNC_WRAPPER_ATTR #if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) # define ASM_FUNC_ABI __attribute__((sysv_abi)) # define ASM_FUNC_WRAPPER_ATTR __attribute__((noinline)) #else # define ASM_FUNC_ABI # define ASM_FUNC_WRAPPER_ATTR #endif #ifdef USE_S390X_ASM #define HAVE_ASM_POLY1305_BLOCKS 1 extern unsigned int _gcry_poly1305_s390x_blocks1(void *state, const byte *buf, size_t len, byte high_pad); static unsigned int poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, byte high_pad) { return _gcry_poly1305_s390x_blocks1(&ctx->state, buf, len, high_pad); } #endif /* USE_S390X_ASM */ #ifdef POLY1305_USE_AVX512 extern unsigned int _gcry_poly1305_amd64_avx512_blocks(const void *msg, const u64 msg_len, void *hash, const void *key) ASM_FUNC_ABI; ASM_FUNC_WRAPPER_ATTR static unsigned int poly1305_amd64_avx512_blocks(poly1305_context_t *ctx, const byte *buf, size_t len) { POLY1305_STATE *st = &ctx->state; return _gcry_poly1305_amd64_avx512_blocks(buf, len, st->h, st->r); } #endif /* POLY1305_USE_AVX512 */ +#ifdef POLY1305_USE_PPC_VEC + +extern unsigned int +gcry_poly1305_p10le_4blocks(unsigned char *key, const byte *m, size_t len); + +#endif /* POLY1305_USE_PPC_VEC */ + + static void poly1305_init (poly1305_context_t *ctx, const byte key[POLY1305_KEYLEN]) { POLY1305_STATE *st = &ctx->state; #ifdef POLY1305_USE_AVX512 ctx->use_avx512 = (_gcry_get_hw_features () & HWF_INTEL_AVX512) != 0; #endif +#ifdef POLY1305_USE_PPC_VEC + ctx->use_p10 = (_gcry_get_hw_features () & HWF_PPC_ARCH_3_10) != 0; +#endif + ctx->leftover = 0; st->h[0] = 0; st->h[1] = 0; st->h[2] = 0; st->h[3] = 0; st->h[4] = 0; st->r[0] = buf_get_le32(key + 0) & 0x0fffffff; st->r[1] = buf_get_le32(key + 4) & 0x0ffffffc; st->r[2] = buf_get_le32(key + 8) & 0x0ffffffc; st->r[3] = buf_get_le32(key + 12) & 0x0ffffffc; st->k[0] = buf_get_le32(key + 16); st->k[1] = buf_get_le32(key + 20); st->k[2] = buf_get_le32(key + 24); st->k[3] = buf_get_le32(key + 28); } #ifdef USE_MPI_64BIT #if defined (__aarch64__) && defined(HAVE_CPU_ARCH_ARM) && __GNUC__ >= 4 /* A += B (armv8/aarch64) */ #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ __asm__ ("adds %0, %3, %0\n" \ "adcs %1, %4, %1\n" \ "adc %2, %5, %2\n" \ : "+r" (A0), "+r" (A1), "+r" (A2) \ : "r" (B0), "r" (B1), "r" (B2) \ : "cc" ) #endif /* __aarch64__ */ #if defined (__x86_64__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4 /* A += B (x86-64) */ #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ __asm__ ("addq %3, %0\n" \ "adcq %4, %1\n" \ "adcq %5, %2\n" \ : "+r" (A0), "+r" (A1), "+r" (A2) \ : "g" (B0), "g" (B1), "g" (B2) \ : "cc" ) #endif /* __x86_64__ */ #if defined (__powerpc__) && defined(HAVE_CPU_ARCH_PPC) && __GNUC__ >= 4 /* A += B (ppc64) */ #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ __asm__ ("addc %0, %3, %0\n" \ "adde %1, %4, %1\n" \ "adde %2, %5, %2\n" \ : "+r" (A0), "+r" (A1), "+r" (A2) \ : "r" (B0), "r" (B1), "r" (B2) \ : "cc" ) #endif /* __powerpc__ */ #ifndef ADD_1305_64 /* A += B (generic, mpi) */ # define ADD_1305_64(A2, A1, A0, B2, B1, B0) do { \ u64 carry; \ add_ssaaaa(carry, A0, 0, A0, 0, B0); \ add_ssaaaa(A2, A1, A2, A1, B2, B1); \ add_ssaaaa(A2, A1, A2, A1, 0, carry); \ } while (0) #endif /* H = H * R mod 2¹³⁰-5 */ #define MUL_MOD_1305_64(H2, H1, H0, R1, R0, R1_MULT5) do { \ u64 x0_lo, x0_hi, x1_lo, x1_hi; \ u64 t0_lo, t0_hi, t1_lo, t1_hi; \ \ /* x = a * r (partial mod 2^130-5) */ \ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ \ umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \ add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \ umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \ add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \ \ t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \ t1_hi = H2 * R0; /* h2 * r0 */ \ add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \ \ /* carry propagation */ \ H2 = H0 & 3; \ H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \ ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \ } while (0) #ifndef HAVE_ASM_POLY1305_BLOCKS static unsigned int poly1305_blocks_generic (poly1305_context_t *ctx, const byte *buf, size_t len, byte high_pad) { POLY1305_STATE *st = &ctx->state; u64 r0, r1, r1_mult5; u64 h0, h1, h2; u64 m0, m1, m2; m2 = high_pad; h0 = st->h[0] + ((u64)st->h[1] << 32); h1 = st->h[2] + ((u64)st->h[3] << 32); h2 = st->h[4]; r0 = st->r[0] + ((u64)st->r[1] << 32); r1 = st->r[2] + ((u64)st->r[3] << 32); r1_mult5 = (r1 >> 2) + r1; m0 = buf_get_le64(buf + 0); m1 = buf_get_le64(buf + 8); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; while (len >= POLY1305_BLOCKSIZE) { /* a = h + m */ ADD_1305_64(h2, h1, h0, m2, m1, m0); m0 = buf_get_le64(buf + 0); m1 = buf_get_le64(buf + 8); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; } /* a = h + m */ ADD_1305_64(h2, h1, h0, m2, m1, m0); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); st->h[0] = h0; st->h[1] = h0 >> 32; st->h[2] = h1; st->h[3] = h1 >> 32; st->h[4] = h2; return 6 * sizeof (void *) + 18 * sizeof (u64); } static unsigned int poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, byte high_pad) { #ifdef POLY1305_USE_AVX512 if ((high_pad & ctx->use_avx512) != 0) return poly1305_amd64_avx512_blocks(ctx, buf, len); #endif return poly1305_blocks_generic(ctx, buf, len, high_pad); } #endif /* !HAVE_ASM_POLY1305_BLOCKS */ static unsigned int poly1305_final (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { POLY1305_STATE *st = &ctx->state; unsigned int burn = 0; u64 u, carry; u64 k0, k1; u64 h0, h1; u64 h2; /* process the remaining block */ if (ctx->leftover) { ctx->buffer[ctx->leftover++] = 1; if (ctx->leftover < POLY1305_BLOCKSIZE) { memset (&ctx->buffer[ctx->leftover], 0, POLY1305_BLOCKSIZE - ctx->leftover); ctx->leftover = POLY1305_BLOCKSIZE; } burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); } h0 = st->h[0] + ((u64)st->h[1] << 32); h1 = st->h[2] + ((u64)st->h[3] << 32); h2 = st->h[4]; k0 = st->k[0] + ((u64)st->k[1] << 32); k1 = st->k[2] + ((u64)st->k[3] << 32); /* check if h is more than 2^130-5, by adding 5. */ add_ssaaaa(carry, u, 0, h0, 0, 5); add_ssaaaa(carry, u, 0, carry, 0, h1); u = (carry + h2) >> 2; /* u == 0 or 1 */ /* minus 2^130-5 ... (+5) */ u = (-u) & 5; add_ssaaaa(h1, h0, h1, h0, 0, u); /* add high part of key + h */ add_ssaaaa(h1, h0, h1, h0, k1, k0); buf_put_le64(mac + 0, h0); buf_put_le64(mac + 8, h1); /* burn_stack */ return 4 * sizeof (void *) + 7 * sizeof (u64) + burn; } #endif /* USE_MPI_64BIT */ #ifdef USE_MPI_32BIT #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS /* HI:LO += A * B (arm) */ #define UMUL_ADD_32(HI, LO, A, B) \ __asm__ ("umlal %1, %0, %4, %5" \ : "=r" (HI), "=r" (LO) \ : "0" (HI), "1" (LO), "r" (A), "r" (B) ) /* A += B (arm) */ #ifdef __GCC_ASM_FLAG_OUTPUTS__ # define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ u32 __carry; \ __asm__ ("adds %0, %0, %5\n" \ "adcs %1, %1, %6\n" \ "adcs %2, %2, %7\n" \ "adcs %3, %3, %8\n" \ : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), \ "=@cccs" (__carry) \ : "r" (B0), "r" (B1), "r" (B2), "r" (B3) \ : ); \ (A4) += (B4) + __carry; \ } while (0) #else # define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ u32 __carry = (B0); \ __asm__ ("adds %0, %0, %2\n" \ "adcs %1, %1, %3\n" \ "rrx %2, %2\n" /* carry to 31th bit */ \ : "+r" (A0), "+r" (A1), "+r" (__carry) \ : "r" (B1), "r" (0) \ : "cc" ); \ __asm__ ("lsls %0, %0, #1\n" /* carry from 31th bit */ \ "adcs %1, %1, %4\n" \ "adcs %2, %2, %5\n" \ "adc %3, %3, %6\n" \ : "+r" (__carry), "+r" (A2), "+r" (A3), "+r" (A4) \ : "r" (B2), "r" (B3), "r" (B4) \ : "cc" ); \ } while (0) #endif #endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */ #if defined (__i386__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 5 /* Note: ADD_1305_32 below does not compile on GCC-4.7 */ /* A += B (i386) */ #define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \ __asm__ ("addl %5, %0\n" \ "adcl %6, %1\n" \ "adcl %7, %2\n" \ "adcl %8, %3\n" \ "adcl %9, %4\n" \ : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \ : "g" (B0), "g" (B1), "g" (B2), "g" (B3), "g" (B4) \ : "cc" ) #endif /* __i386__ */ #ifndef UMUL_ADD_32 /* HI:LO += A * B (generic, mpi) */ # define UMUL_ADD_32(HI, LO, A, B) do { \ u32 t_lo, t_hi; \ umul_ppmm(t_hi, t_lo, A, B); \ add_ssaaaa(HI, LO, HI, LO, t_hi, t_lo); \ } while (0) #endif #ifndef ADD_1305_32 /* A += B (generic, mpi) */ # define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ u32 carry0, carry1, carry2; \ add_ssaaaa(carry0, A0, 0, A0, 0, B0); \ add_ssaaaa(carry1, A1, 0, A1, 0, B1); \ add_ssaaaa(carry1, A1, carry1, A1, 0, carry0); \ add_ssaaaa(carry2, A2, 0, A2, 0, B2); \ add_ssaaaa(carry2, A2, carry2, A2, 0, carry1); \ add_ssaaaa(A4, A3, A4, A3, B4, B3); \ add_ssaaaa(A4, A3, A4, A3, 0, carry2); \ } while (0) #endif /* H = H * R mod 2¹³⁰-5 */ #define MUL_MOD_1305_32(H4, H3, H2, H1, H0, R3, R2, R1, R0, \ R3_MULT5, R2_MULT5, R1_MULT5) do { \ u32 x0_lo, x0_hi, x1_lo, x1_hi, x2_lo, x2_hi, x3_lo, x3_hi; \ u32 t0_lo, t0_hi; \ \ /* x = a * r (partial mod 2^130-5) */ \ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ umul_ppmm(x2_hi, x2_lo, H0, R2); /* h0 * r2 */ \ umul_ppmm(x3_hi, x3_lo, H0, R3); /* h0 * r3 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H1, R3_MULT5); /* h1 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x1_hi, x1_lo, H1, R0); /* h1 * r0 */ \ UMUL_ADD_32(x2_hi, x2_lo, H1, R1); /* h1 * r1 */ \ UMUL_ADD_32(x3_hi, x3_lo, H1, R2); /* h1 * r2 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H2, R2_MULT5); /* h2 * r2 mod 2^130-5 */ \ UMUL_ADD_32(x1_hi, x1_lo, H2, R3_MULT5); /* h2 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x2_hi, x2_lo, H2, R0); /* h2 * r0 */ \ UMUL_ADD_32(x3_hi, x3_lo, H2, R1); /* h2 * r1 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H3, R1_MULT5); /* h3 * r1 mod 2^130-5 */ \ H1 = x0_hi; \ UMUL_ADD_32(x1_hi, x1_lo, H3, R2_MULT5); /* h3 * r2 mod 2^130-5 */ \ UMUL_ADD_32(x2_hi, x2_lo, H3, R3_MULT5); /* h3 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x3_hi, x3_lo, H3, R0); /* h3 * r0 */ \ \ t0_lo = H4 * R1_MULT5; /* h4 * r1 mod 2^130-5 */ \ t0_hi = H4 * R2_MULT5; /* h4 * r2 mod 2^130-5 */ \ add_ssaaaa(H2, x1_lo, x1_hi, x1_lo, 0, t0_lo); \ add_ssaaaa(H3, x2_lo, x2_hi, x2_lo, 0, t0_hi); \ t0_lo = H4 * R3_MULT5; /* h4 * r3 mod 2^130-5 */ \ t0_hi = H4 * R0; /* h4 * r0 */ \ add_ssaaaa(H4, x3_lo, x3_hi, x3_lo, t0_hi, t0_lo); \ \ /* carry propagation */ \ H0 = (H4 >> 2) * 5; /* msb mod 2^130-5 */ \ H4 = H4 & 3; \ ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \ } while (0) #ifndef HAVE_ASM_POLY1305_BLOCKS static unsigned int poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, byte high_pad) { POLY1305_STATE *st = &ctx->state; u32 r1_mult5, r2_mult5, r3_mult5; u32 h0, h1, h2, h3, h4; u32 m0, m1, m2, m3, m4; m4 = high_pad; h0 = st->h[0]; h1 = st->h[1]; h2 = st->h[2]; h3 = st->h[3]; h4 = st->h[4]; r1_mult5 = (st->r[1] >> 2) + st->r[1]; r2_mult5 = (st->r[2] >> 2) + st->r[2]; r3_mult5 = (st->r[3] >> 2) + st->r[3]; while (len >= POLY1305_BLOCKSIZE) { m0 = buf_get_le32(buf + 0); m1 = buf_get_le32(buf + 4); m2 = buf_get_le32(buf + 8); m3 = buf_get_le32(buf + 12); /* a = h + m */ ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_32(h4, h3, h2, h1, h0, st->r[3], st->r[2], st->r[1], st->r[0], r3_mult5, r2_mult5, r1_mult5); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; } st->h[0] = h0; st->h[1] = h1; st->h[2] = h2; st->h[3] = h3; st->h[4] = h4; return 6 * sizeof (void *) + 28 * sizeof (u32); } #endif /* !HAVE_ASM_POLY1305_BLOCKS */ static unsigned int poly1305_final (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { POLY1305_STATE *st = &ctx->state; unsigned int burn = 0; u32 carry, tmp0, tmp1, tmp2, u; u32 h4, h3, h2, h1, h0; /* process the remaining block */ if (ctx->leftover) { ctx->buffer[ctx->leftover++] = 1; if (ctx->leftover < POLY1305_BLOCKSIZE) { memset (&ctx->buffer[ctx->leftover], 0, POLY1305_BLOCKSIZE - ctx->leftover); ctx->leftover = POLY1305_BLOCKSIZE; } burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); } h0 = st->h[0]; h1 = st->h[1]; h2 = st->h[2]; h3 = st->h[3]; h4 = st->h[4]; /* check if h is more than 2^130-5, by adding 5. */ add_ssaaaa(carry, tmp0, 0, h0, 0, 5); add_ssaaaa(carry, tmp0, 0, carry, 0, h1); add_ssaaaa(carry, tmp0, 0, carry, 0, h2); add_ssaaaa(carry, tmp0, 0, carry, 0, h3); u = (carry + h4) >> 2; /* u == 0 or 1 */ /* minus 2^130-5 ... (+5) */ u = (-u) & 5; add_ssaaaa(carry, h0, 0, h0, 0, u); add_ssaaaa(carry, h1, 0, h1, 0, carry); add_ssaaaa(carry, h2, 0, h2, 0, carry); add_ssaaaa(carry, h3, 0, h3, 0, carry); /* add high part of key + h */ add_ssaaaa(tmp0, h0, 0, h0, 0, st->k[0]); add_ssaaaa(tmp1, h1, 0, h1, 0, st->k[1]); add_ssaaaa(tmp1, h1, tmp1, h1, 0, tmp0); add_ssaaaa(tmp2, h2, 0, h2, 0, st->k[2]); add_ssaaaa(tmp2, h2, tmp2, h2, 0, tmp1); add_ssaaaa(carry, h3, 0, h3, 0, st->k[3]); h3 += tmp2; buf_put_le32(mac + 0, h0); buf_put_le32(mac + 4, h1); buf_put_le32(mac + 8, h2); buf_put_le32(mac + 12, h3); /* burn_stack */ return 4 * sizeof (void *) + 10 * sizeof (u32) + burn; } #endif /* USE_MPI_32BIT */ unsigned int _gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m, size_t bytes) { unsigned int burn = 0; + unsigned int nburn; /* handle leftover */ if (ctx->leftover) { size_t want = (POLY1305_BLOCKSIZE - ctx->leftover); if (want > bytes) want = bytes; buf_cpy (ctx->buffer + ctx->leftover, m, want); bytes -= want; m += want; ctx->leftover += want; if (ctx->leftover < POLY1305_BLOCKSIZE) return 0; - burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1); + nburn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1); + burn = nburn > burn ? nburn : burn; ctx->leftover = 0; } +#ifdef POLY1305_USE_PPC_VEC + /* PPC-P10/little-endian: bulk process multiples of eight blocks */ + if (ctx->use_p10 && bytes >= POLY1305_BLOCKSIZE * 8) + { + size_t nblks = bytes / (POLY1305_BLOCKSIZE * 8); + size_t len = nblks * (POLY1305_BLOCKSIZE * 8); + POLY1305_STATE *st = &ctx->state; + nburn = gcry_poly1305_p10le_4blocks ((unsigned char *) st, m, len); + burn = nburn > burn ? nburn : burn; + m += len; + bytes -= len; + } +#endif /* POLY1305_USE_PPC_VEC */ + /* process full blocks */ if (bytes >= POLY1305_BLOCKSIZE) { size_t nblks = bytes / POLY1305_BLOCKSIZE; - burn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1); + nburn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1); + burn = nburn > burn ? nburn : burn; m += nblks * POLY1305_BLOCKSIZE; bytes -= nblks * POLY1305_BLOCKSIZE; } /* store leftover */ if (bytes) { buf_cpy (ctx->buffer + ctx->leftover, m, bytes); ctx->leftover += bytes; } return burn; } void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *m, size_t bytes) { unsigned int burn; burn = _gcry_poly1305_update_burn (ctx, m, bytes); if (burn) _gcry_burn_stack (burn); } void _gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { unsigned int burn; burn = poly1305_final (ctx, mac); _gcry_burn_stack (burn); } gcry_err_code_t _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, size_t keylen) { static int initialized; static const char *selftest_failed; if (!initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("Poly1305 selftest failed (%s)\n", selftest_failed); } if (keylen != POLY1305_KEYLEN) return GPG_ERR_INV_KEYLEN; if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; poly1305_init (ctx, key); return 0; } static void poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes, const byte * key) { poly1305_context_t ctx; memset (&ctx, 0, sizeof (ctx)); _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN); _gcry_poly1305_update (&ctx, m, bytes); _gcry_poly1305_finish (&ctx, mac); wipememory (&ctx, sizeof (ctx)); } static const char * selftest (void) { /* example from nacl */ static const byte nacl_key[POLY1305_KEYLEN] = { 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91, 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25, 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65, 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80, }; static const byte nacl_msg[131] = { 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73, 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce, 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4, 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a, 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b, 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72, 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2, 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38, 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a, 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae, 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea, 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda, 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde, 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3, 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6, 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74, 0xe3, 0x55, 0xa5 }; static const byte nacl_mac[16] = { 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5, 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9 }; /* generates a final value of (2^130 - 2) == 3 */ static const byte wrap_key[POLY1305_KEYLEN] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static const byte wrap_msg[16] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const byte wrap_mac[16] = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; /* mac of the macs of messages of length 0 to 256, where the key and messages * have all their values set to the length */ static const byte total_key[POLY1305_KEYLEN] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const byte total_mac[16] = { 0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd, 0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39 }; poly1305_context_t ctx; poly1305_context_t total_ctx; byte all_key[POLY1305_KEYLEN]; byte all_msg[256]; byte mac[16]; size_t i, j; memset (&ctx, 0, sizeof (ctx)); memset (&total_ctx, 0, sizeof (total_ctx)); memset (mac, 0, sizeof (mac)); poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key); if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 1 failed."; /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so * make sure everything still works varying between them */ memset (mac, 0, sizeof (mac)); _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN); _gcry_poly1305_update (&ctx, nacl_msg + 0, 32); _gcry_poly1305_update (&ctx, nacl_msg + 32, 64); _gcry_poly1305_update (&ctx, nacl_msg + 96, 16); _gcry_poly1305_update (&ctx, nacl_msg + 112, 8); _gcry_poly1305_update (&ctx, nacl_msg + 120, 4); _gcry_poly1305_update (&ctx, nacl_msg + 124, 2); _gcry_poly1305_update (&ctx, nacl_msg + 126, 1); _gcry_poly1305_update (&ctx, nacl_msg + 127, 1); _gcry_poly1305_update (&ctx, nacl_msg + 128, 1); _gcry_poly1305_update (&ctx, nacl_msg + 129, 1); _gcry_poly1305_update (&ctx, nacl_msg + 130, 1); _gcry_poly1305_finish (&ctx, mac); if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 2 failed."; memset (mac, 0, sizeof (mac)); poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key); if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 3 failed."; _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN); for (i = 0; i < 256; i++) { /* set key and message to 'i,i,i..' */ for (j = 0; j < sizeof (all_key); j++) all_key[j] = i; for (j = 0; j < i; j++) all_msg[j] = i; poly1305_auth (mac, all_msg, i, all_key); _gcry_poly1305_update (&total_ctx, mac, 16); } _gcry_poly1305_finish (&total_ctx, mac); if (memcmp (total_mac, mac, sizeof (total_mac)) != 0) return "Poly1305 test 4 failed."; return NULL; } diff --git a/configure.ac b/configure.ac index a7482cf3..52741db2 100644 --- a/configure.ac +++ b/configure.ac @@ -1,3400 +1,3412 @@ # Configure.ac script for Libgcrypt # Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, # 2007, 2008, 2009, 2011 Free Software Foundation, Inc. # Copyright (C) 2012-2021 g10 Code GmbH # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # (Process this file with autoconf to produce a configure script.) AC_REVISION($Revision$) AC_PREREQ([2.69]) min_automake_version="1.14" # To build a release you need to create a tag with the version number # (git tag -s libgcrypt-n.m.k) and run "./autogen.sh --force". Please # bump the version number immediately after the release and do another # commit and push so that the git magic is able to work. See below # for the LT versions. m4_define([mym4_package],[libgcrypt]) m4_define([mym4_major], [1]) m4_define([mym4_minor], [11]) m4_define([mym4_micro], [0]) # Below is m4 magic to extract and compute the git revision number, # the decimalized short revision number, a beta version string and a # flag indicating a development version (mym4_isbeta). Note that the # m4 processing is done by autoconf and not during the configure run. m4_define([mym4_verslist], m4_split(m4_esyscmd([./autogen.sh --find-version] \ mym4_package mym4_major mym4_minor mym4_micro),[:])) m4_define([mym4_isbeta], m4_argn(2, mym4_verslist)) m4_define([mym4_version], m4_argn(4, mym4_verslist)) m4_define([mym4_revision], m4_argn(7, mym4_verslist)) m4_define([mym4_revision_dec], m4_argn(8, mym4_verslist)) m4_esyscmd([echo ]mym4_version[>VERSION]) AC_INIT([mym4_package],[mym4_version],[https://bugs.gnupg.org]) # LT Version numbers, remember to change them just *before* a release. # NOET NOTE - Already updated for a 1.11 series - NOTE NOTE # (Code changed: REVISION++) # (Interfaces added/removed/changed: CURRENT++, REVISION=0) # (Interfaces added: AGE++) # (Interfaces removed: AGE=0) # # (Interfaces removed: CURRENT++, AGE=0, REVISION=0) # (Interfaces added: CURRENT++, AGE++, REVISION=0) # (No interfaces changed: REVISION++) LIBGCRYPT_LT_CURRENT=25 LIBGCRYPT_LT_AGE=5 LIBGCRYPT_LT_REVISION=0 ################################################ AC_SUBST(LIBGCRYPT_LT_CURRENT) AC_SUBST(LIBGCRYPT_LT_AGE) AC_SUBST(LIBGCRYPT_LT_REVISION) # If the API is changed in an incompatible way: increment the next counter. # # 1.6: ABI and API change but the change is to most users irrelevant # and thus the API version number has not been incremented. LIBGCRYPT_CONFIG_API_VERSION=1 # If you change the required gpg-error version, please remove # unnecessary error code defines in src/gcrypt-int.h. NEED_GPG_ERROR_VERSION=1.27 AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_SRCDIR([src/libgcrypt.vers]) AM_INIT_AUTOMAKE([serial-tests dist-bzip2]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_LIBOBJ_DIR([compat]) AC_CANONICAL_HOST AM_MAINTAINER_MODE AM_SILENT_RULES AC_ARG_VAR(SYSROOT,[locate config scripts also below that directory]) AH_TOP([ #ifndef _GCRYPT_CONFIG_H_INCLUDED #define _GCRYPT_CONFIG_H_INCLUDED /* Enable gpg-error's strerror macro for W32CE. */ #define GPG_ERR_ENABLE_ERRNO_MACROS 1 ]) AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 /* Add .note.gnu.property section for Intel CET in assembler sources when CET is enabled. */ #if defined(__ASSEMBLER__) && defined(__CET__) # include #endif /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK # if defined(__BIG_ENDIAN__) # define WORDS_BIGENDIAN 1 # elif defined(__LITTLE_ENDIAN__) # undef WORDS_BIGENDIAN # else # error "No endianness found" # endif #endif /*DISABLED_ENDIAN_CHECK*/ /* We basically use the original Camellia source. Make sure the symbols properly prefixed. */ #define CAMELLIA_EXT_SYM_PREFIX _gcry_ #endif /*_GCRYPT_CONFIG_H_INCLUDED*/ ]) AH_VERBATIM([_REENTRANT], [/* To allow the use of Libgcrypt in multithreaded programs we have to use special features from the library. */ #ifndef _REENTRANT # define _REENTRANT 1 #endif ]) ###################### ## Basic checks. ### (we need some results later on (e.g. $GCC) ###################### AC_PROG_MAKE_SET missing_dir=`cd $ac_aux_dir && pwd` AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) # AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir) AC_PROG_CC AC_PROG_CPP AM_PROG_CC_C_O AM_PROG_AS AC_SEARCH_LIBS([strerror],[cposix]) AC_PROG_INSTALL AC_PROG_AWK AC_USE_SYSTEM_EXTENSIONS # Taken from mpfr-4.0.1, then modified for LDADD_FOR_TESTS_KLUDGE dnl Under Linux, make sure that the old dtags are used if LD_LIBRARY_PATH dnl is defined. The issue is that with the new dtags, LD_LIBRARY_PATH has dnl the precedence over the run path, so that if a compatible MPFR library dnl is installed in some directory from $LD_LIBRARY_PATH, then the tested dnl MPFR library will be this library instead of the MPFR library from the dnl build tree. Other OS with the same issue might be added later. dnl dnl References: dnl https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=859732 dnl http://lists.gnu.org/archive/html/libtool/2017-05/msg00000.html dnl dnl We need to check whether --disable-new-dtags is supported as alternate dnl linkers may be used (e.g., with tcc: CC=tcc LD=tcc). dnl case $host in *-*-linux*) if test -n "$LD_LIBRARY_PATH"; then saved_LDFLAGS="$LDFLAGS" LDADD_FOR_TESTS_KLUDGE="-Wl,--disable-new-dtags" LDFLAGS="$LDFLAGS $LDADD_FOR_TESTS_KLUDGE" AC_MSG_CHECKING(whether --disable-new-dtags is supported by the linker) AC_LINK_IFELSE([AC_LANG_SOURCE([[ int main (void) { return 0; } ]])], [AC_MSG_RESULT(yes (use it since LD_LIBRARY_PATH is set))], [AC_MSG_RESULT(no) LDADD_FOR_TESTS_KLUDGE="" ]) LDFLAGS="$saved_LDFLAGS" fi ;; esac AC_SUBST([LDADD_FOR_TESTS_KLUDGE]) VERSION_NUMBER=m4_esyscmd(printf "0x%02x%02x%02x" mym4_major \ mym4_minor mym4_micro) AC_SUBST(VERSION_NUMBER) # We need to compile and run a program on the build machine. AX_CC_FOR_BUILD LT_PREREQ([2.2.6]) LT_INIT([win32-dll disable-static]) LT_LANG([Windows Resource]) ########################## ## General definitions. ## ########################## # Used by libgcrypt-config LIBGCRYPT_CONFIG_LIBS="-lgcrypt" LIBGCRYPT_CONFIG_CFLAGS="" LIBGCRYPT_CONFIG_HOST="$host" # Definitions for symmetric ciphers. available_ciphers="arcfour blowfish cast5 des aes twofish serpent rfc2268 seed" available_ciphers="$available_ciphers camellia idea salsa20 gost28147 chacha20" available_ciphers="$available_ciphers sm4" enabled_ciphers="" # Definitions for public-key ciphers. available_pubkey_ciphers="dsa elgamal rsa ecc" enabled_pubkey_ciphers="" # Definitions for message digests. available_digests="crc gostr3411-94 md2 md4 md5 rmd160 sha1 sha256 sha512" available_digests="$available_digests sha3 tiger whirlpool stribog blake2" available_digests="$available_digests sm3" enabled_digests="" # Definitions for kdfs (optional ones) available_kdfs="s2k pkdf2 scrypt" enabled_kdfs="" # Definitions for random modules. available_random_modules="getentropy linux egd unix" auto_random_modules="$available_random_modules" # Supported thread backends. LIBGCRYPT_THREAD_MODULES="" # Other definitions. have_w32_system=no have_w32ce_system=no have_pthread=no # Setup some stuff depending on host. case "${host}" in *-*-mingw32*) ac_cv_have_dev_random=no have_w32_system=yes case "${host}" in *-mingw32ce*) have_w32ce_system=yes available_random_modules="w32ce" ;; *) available_random_modules="w32" ;; esac AC_DEFINE(USE_ONLY_8DOT3,1, [set this to limit filenames to the 8.3 format]) AC_DEFINE(HAVE_DRIVE_LETTERS,1, [defined if we must run on a stupid file system]) AC_DEFINE(HAVE_DOSISH_SYSTEM,1, [defined if we run on some of the PCDOS like systems (DOS, Windoze. OS/2) with special properties like no file modes]) ;; i?86-emx-os2 | i?86-*-os2*emx) # OS/2 with the EMX environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; i?86-*-msdosdjgpp*) # DOS with the DJGPP environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; *-*-hpux*) if test -z "$GCC" ; then CFLAGS="$CFLAGS -Ae -D_HPUX_SOURCE" fi ;; *-dec-osf4*) if test -z "$GCC" ; then # Suppress all warnings # to get rid of the unsigned/signed char mismatch warnings. CFLAGS="$CFLAGS -w" fi ;; m68k-atari-mint) ;; *-apple-darwin*) AC_DEFINE(_DARWIN_C_SOURCE, 1, Expose all libc features (__DARWIN_C_FULL).) AC_DEFINE(USE_POSIX_SPAWN_FOR_TESTS, 1, [defined if we use posix_spawn in test program]) AC_CHECK_HEADERS(spawn.h) ;; *) ;; esac if test "$have_w32_system" = yes; then AC_DEFINE(HAVE_W32_SYSTEM,1, [Defined if we run on a W32 API based system]) if test "$have_w32ce_system" = yes; then AC_DEFINE(HAVE_W32CE_SYSTEM,1,[Defined if we run on WindowsCE]) fi fi AM_CONDITIONAL(HAVE_W32_SYSTEM, test "$have_w32_system" = yes) AM_CONDITIONAL(HAVE_W32CE_SYSTEM, test "$have_w32ce_system" = yes) # A printable OS Name is sometimes useful. case "${host}" in *-*-mingw32ce*) PRINTABLE_OS_NAME="W32CE" ;; *-*-mingw32*) PRINTABLE_OS_NAME="W32" ;; i?86-emx-os2 | i?86-*-os2*emx ) PRINTABLE_OS_NAME="OS/2" ;; i?86-*-msdosdjgpp*) PRINTABLE_OS_NAME="MSDOS/DJGPP" ;; *-linux*) PRINTABLE_OS_NAME="GNU/Linux" ;; *) PRINTABLE_OS_NAME=`uname -s || echo "Unknown"` ;; esac NAME_OF_DEV_RANDOM="/dev/random" NAME_OF_DEV_URANDOM="/dev/urandom" AC_ARG_ENABLE(endian-check, AS_HELP_STRING([--disable-endian-check], [disable the endian check and trust the OS provided macros]), endiancheck=$enableval,endiancheck=yes) if test x"$endiancheck" = xyes ; then AC_C_BIGENDIAN else AC_DEFINE(DISABLED_ENDIAN_CHECK,1,[configure did not test for endianness]) fi AC_CHECK_SIZEOF(unsigned short, 2) AC_CHECK_SIZEOF(unsigned int, 4) AC_CHECK_SIZEOF(unsigned long, 4) AC_CHECK_SIZEOF(unsigned long long, 0) AC_CHECK_SIZEOF(void *, 0) AC_TYPE_UINTPTR_T if test "$ac_cv_sizeof_unsigned_short" = "0" \ || test "$ac_cv_sizeof_unsigned_int" = "0" \ || test "$ac_cv_sizeof_unsigned_long" = "0"; then AC_MSG_WARN([Hmmm, something is wrong with the sizes - using defaults]); fi # Ensure that we have UINT64_C before we bother to check for uint64_t AC_CACHE_CHECK([for UINT64_C],[gnupg_cv_uint64_c_works], AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[uint64_t foo=UINT64_C(42);]])], gnupg_cv_uint64_c_works=yes,gnupg_cv_uint64_c_works=no)) if test "$gnupg_cv_uint64_c_works" = "yes" ; then AC_CHECK_SIZEOF(uint64_t) fi # Do we have any 64-bit data types? if test "$ac_cv_sizeof_unsigned_int" != "8" \ && test "$ac_cv_sizeof_unsigned_long" != "8" \ && test "$ac_cv_sizeof_unsigned_long_long" != "8" \ && test "$ac_cv_sizeof_uint64_t" != "8"; then AC_MSG_ERROR([[ *** *** No 64-bit integer type available. *** It is not possible to build Libgcrypt on this platform. ***]]) fi # If not specified otherwise, all available algorithms will be # included. default_ciphers="$available_ciphers" default_pubkey_ciphers="$available_pubkey_ciphers" default_digests="$available_digests" default_kdfs="$available_kdfs" # Blacklist MD2 by default default_digests=`echo $default_digests | sed -e 's/md2//g'` # Substitutions to set generated files in a Emacs buffer to read-only. AC_SUBST(emacs_local_vars_begin, ['Local Variables:']) AC_SUBST(emacs_local_vars_read_only, ['buffer-read-only: t']) AC_SUBST(emacs_local_vars_end, ['End:']) ############################ ## Command line switches. ## ############################ # Implementation of the --enable-ciphers switch. AC_ARG_ENABLE(ciphers, AS_HELP_STRING([--enable-ciphers=ciphers], [select the symmetric ciphers to include]), [enabled_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_ciphers=""]) if test "x$enabled_ciphers" = "x" \ -o "$enabled_ciphers" = "yes" \ -o "$enabled_ciphers" = "no"; then enabled_ciphers=$default_ciphers fi AC_MSG_CHECKING([which symmetric ciphers to include]) for cipher in $enabled_ciphers; do LIST_MEMBER($cipher, $available_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported cipher "$cipher" specified]) fi done AC_MSG_RESULT([$enabled_ciphers]) # Implementation of the --enable-pubkey-ciphers switch. AC_ARG_ENABLE(pubkey-ciphers, AS_HELP_STRING([--enable-pubkey-ciphers=ciphers], [select the public-key ciphers to include]), [enabled_pubkey_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_pubkey_ciphers=""]) if test "x$enabled_pubkey_ciphers" = "x" \ -o "$enabled_pubkey_ciphers" = "yes" \ -o "$enabled_pubkey_ciphers" = "no"; then enabled_pubkey_ciphers=$default_pubkey_ciphers fi AC_MSG_CHECKING([which public-key ciphers to include]) for cipher in $enabled_pubkey_ciphers; do LIST_MEMBER($cipher, $available_pubkey_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported public-key cipher specified]) fi done AC_MSG_RESULT([$enabled_pubkey_ciphers]) # Implementation of the --enable-digests switch. AC_ARG_ENABLE(digests, AS_HELP_STRING([--enable-digests=digests], [select the message digests to include]), [enabled_digests=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_digests=""]) if test "x$enabled_digests" = "x" \ -o "$enabled_digests" = "yes" \ -o "$enabled_digests" = "no"; then enabled_digests=$default_digests fi AC_MSG_CHECKING([which message digests to include]) for digest in $enabled_digests; do LIST_MEMBER($digest, $available_digests) if test "$found" = "0"; then AC_MSG_ERROR([unsupported message digest specified]) fi done AC_MSG_RESULT([$enabled_digests]) # Implementation of the --enable-kdfs switch. AC_ARG_ENABLE(kdfs, AS_HELP_STRING([--enable-kfds=kdfs], [select the KDFs to include]), [enabled_kdfs=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_kdfs=""]) if test "x$enabled_kdfs" = "x" \ -o "$enabled_kdfs" = "yes" \ -o "$enabled_kdfs" = "no"; then enabled_kdfs=$default_kdfs fi AC_MSG_CHECKING([which key derivation functions to include]) for kdf in $enabled_kdfs; do LIST_MEMBER($kdf, $available_kdfs) if test "$found" = "0"; then AC_MSG_ERROR([unsupported key derivation function specified]) fi done AC_MSG_RESULT([$enabled_kdfs]) # Implementation of the --enable-random switch. AC_ARG_ENABLE(random, AS_HELP_STRING([--enable-random=name], [select which random number generator to use]), [random=`echo $enableval | tr '[A-Z]' '[a-z]'`], []) if test "x$random" = "x" -o "$random" = "yes" -o "$random" = "no"; then random=default fi AC_MSG_CHECKING([which random module to use]) if test "$random" != "default" -a "$random" != "auto"; then LIST_MEMBER($random, $available_random_modules) if test "$found" = "0"; then AC_MSG_ERROR([unsupported random module specified]) fi fi AC_MSG_RESULT($random) # Implementation of the --disable-dev-random switch. AC_MSG_CHECKING([whether use of /dev/random is requested]) AC_ARG_ENABLE(dev-random, [ --disable-dev-random disable the use of dev random], try_dev_random=$enableval, try_dev_random=yes) AC_MSG_RESULT($try_dev_random) # Implementation of the --with-egd-socket switch. AC_ARG_WITH(egd-socket, [ --with-egd-socket=NAME Use NAME for the EGD socket)], egd_socket_name="$withval", egd_socket_name="" ) AC_DEFINE_UNQUOTED(EGD_SOCKET_NAME, "$egd_socket_name", [Define if you don't want the default EGD socket name. For details see cipher/rndegd.c]) # Implementation of --disable-asm. AC_MSG_CHECKING([whether MPI and cipher assembler modules are requested]) AC_ARG_ENABLE([asm], AS_HELP_STRING([--disable-asm], [Disable MPI and cipher assembler modules]), [try_asm_modules=$enableval], [try_asm_modules=yes]) AC_MSG_RESULT($try_asm_modules) if test "$try_asm_modules" != yes ; then AC_DEFINE(ASM_DISABLED,1,[Defined if --disable-asm was used to configure]) fi # Implementation of the --enable-large-data-tests switch. AC_MSG_CHECKING([whether to run large data tests]) AC_ARG_ENABLE(large-data-tests, AS_HELP_STRING([--enable-large-data-tests], [Enable the real long ruinning large data tests]), large_data_tests=$enableval,large_data_tests=no) AC_MSG_RESULT($large_data_tests) AC_SUBST(RUN_LARGE_DATA_TESTS, $large_data_tests) # Implementation of --enable-force-soft-hwfeatures AC_MSG_CHECKING([whether 'soft' HW feature bits are forced on]) AC_ARG_ENABLE([force-soft-hwfeatures], AS_HELP_STRING([--enable-force-soft-hwfeatures], [Enable forcing 'soft' HW feature bits on]), [force_soft_hwfeatures=$enableval], [force_soft_hwfeatures=no]) AC_MSG_RESULT($force_soft_hwfeatures) # Implementation of the --with-capabilities switch. # Check whether we want to use Linux capabilities AC_MSG_CHECKING([whether use of capabilities is requested]) AC_ARG_WITH(capabilities, AS_HELP_STRING([--with-capabilities], [Use linux capabilities [default=no]]), [use_capabilities="$withval"],[use_capabilities=no]) AC_MSG_RESULT($use_capabilities) # Implementation of the --enable-hmac-binary-check. AC_MSG_CHECKING([whether a HMAC binary check is requested]) AC_ARG_ENABLE(hmac-binary-check, AS_HELP_STRING([--enable-hmac-binary-check], [Enable library integrity check]), [use_hmac_binary_check="$enableval"], [use_hmac_binary_check=no]) AC_MSG_RESULT($use_hmac_binary_check) if test "$use_hmac_binary_check" = no ; then DEF_HMAC_BINARY_CHECK='' else AC_DEFINE(ENABLE_HMAC_BINARY_CHECK,1, [Define to support an HMAC based integrity check]) AC_CHECK_TOOL(OBJCOPY, [objcopy]) AC_CHECK_TOOL(READELF, [readelf]) if test "$use_hmac_binary_check" != yes ; then DEF_HMAC_BINARY_CHECK=-DKEY_FOR_BINARY_CHECK="'\"$use_hmac_binary_check\"'" fi fi AM_CONDITIONAL(USE_HMAC_BINARY_CHECK, test "x$use_hmac_binary_check" != xno) AC_SUBST(DEF_HMAC_BINARY_CHECK) # Implementation of the --with-fips-module-version. AC_ARG_WITH(fips-module-version, AS_HELP_STRING([--with-fips-module-version=VERSION], [Specify the FIPS module version for the build]), fips_module_version="$withval", fips_module_version="" ) AC_DEFINE_UNQUOTED(FIPS_MODULE_VERSION, "$fips_module_version", [Define FIPS module version for certification]) # Implementation of the --disable-jent-support switch. AC_MSG_CHECKING([whether jitter entropy support is requested]) AC_ARG_ENABLE(jent-support, AS_HELP_STRING([--disable-jent-support], [Disable support for the Jitter entropy collector]), jentsupport=$enableval,jentsupport=yes) AC_MSG_RESULT($jentsupport) # Implementation of the --disable-padlock-support switch. AC_MSG_CHECKING([whether padlock support is requested]) AC_ARG_ENABLE(padlock-support, AS_HELP_STRING([--disable-padlock-support], [Disable support for the PadLock Engine of VIA processors]), padlocksupport=$enableval,padlocksupport=yes) AC_MSG_RESULT($padlocksupport) # Implementation of the --disable-aesni-support switch. AC_MSG_CHECKING([whether AESNI support is requested]) AC_ARG_ENABLE(aesni-support, AS_HELP_STRING([--disable-aesni-support], [Disable support for the Intel AES-NI instructions]), aesnisupport=$enableval,aesnisupport=yes) AC_MSG_RESULT($aesnisupport) # Implementation of the --disable-shaext-support switch. AC_MSG_CHECKING([whether SHAEXT support is requested]) AC_ARG_ENABLE(shaext-support, AS_HELP_STRING([--disable-shaext-support], [Disable support for the Intel SHAEXT instructions]), shaextsupport=$enableval,shaextsupport=yes) AC_MSG_RESULT($shaextsupport) # Implementation of the --disable-pclmul-support switch. AC_MSG_CHECKING([whether PCLMUL support is requested]) AC_ARG_ENABLE(pclmul-support, AS_HELP_STRING([--disable-pclmul-support], [Disable support for the Intel PCLMUL instructions]), pclmulsupport=$enableval,pclmulsupport=yes) AC_MSG_RESULT($pclmulsupport) # Implementation of the --disable-sse41-support switch. AC_MSG_CHECKING([whether SSE4.1 support is requested]) AC_ARG_ENABLE(sse41-support, AS_HELP_STRING([--disable-sse41-support], [Disable support for the Intel SSE4.1 instructions]), sse41support=$enableval,sse41support=yes) AC_MSG_RESULT($sse41support) # Implementation of the --disable-drng-support switch. AC_MSG_CHECKING([whether DRNG support is requested]) AC_ARG_ENABLE(drng-support, AS_HELP_STRING([--disable-drng-support], [Disable support for the Intel DRNG (RDRAND instruction)]), drngsupport=$enableval,drngsupport=yes) AC_MSG_RESULT($drngsupport) # Implementation of the --disable-avx-support switch. AC_MSG_CHECKING([whether AVX support is requested]) AC_ARG_ENABLE(avx-support, AS_HELP_STRING([--disable-avx-support], [Disable support for the Intel AVX instructions]), avxsupport=$enableval,avxsupport=yes) AC_MSG_RESULT($avxsupport) # Implementation of the --disable-avx2-support switch. AC_MSG_CHECKING([whether AVX2 support is requested]) AC_ARG_ENABLE(avx2-support, AS_HELP_STRING([--disable-avx2-support], [Disable support for the Intel AVX2 instructions]), avx2support=$enableval,avx2support=yes) AC_MSG_RESULT($avx2support) # Implementation of the --disable-avx512-support switch. AC_MSG_CHECKING([whether AVX512 support is requested]) AC_ARG_ENABLE(avx512-support, AS_HELP_STRING([--disable-avx512-support], [Disable support for the Intel AVX512 instructions]), avx512support=$enableval,avx512support=yes) AC_MSG_RESULT($avx512support) # Implementation of the --disable-gfni-support switch. AC_MSG_CHECKING([whether GFNI support is requested]) AC_ARG_ENABLE(gfni-support, AS_HELP_STRING([--disable-gfni-support], [Disable support for the Intel GFNI instructions]), gfnisupport=$enableval,gfnisupport=yes) AC_MSG_RESULT($gfnisupport) # Implementation of the --disable-neon-support switch. AC_MSG_CHECKING([whether NEON support is requested]) AC_ARG_ENABLE(neon-support, AS_HELP_STRING([--disable-neon-support], [Disable support for the ARM NEON instructions]), neonsupport=$enableval,neonsupport=yes) AC_MSG_RESULT($neonsupport) # Implementation of the --disable-arm-crypto-support switch. AC_MSG_CHECKING([whether ARMv8 Crypto Extension support is requested]) AC_ARG_ENABLE(arm-crypto-support, AS_HELP_STRING([--disable-arm-crypto-support], [Disable support for the ARMv8 Crypto Extension instructions]), armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) # Implementation of the --disable-ppc-crypto-support switch. AC_MSG_CHECKING([whether PPC crypto support is requested]) AC_ARG_ENABLE(ppc-crypto-support, AS_HELP_STRING([--disable-ppc-crypto-support], [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]), ppccryptosupport=$enableval,ppccryptosupport=yes) AC_MSG_RESULT($ppccryptosupport) # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], AS_HELP_STRING([--disable-O-flag-munging], [Disable modification of the cc -O flag]), [enable_o_flag_munging=$enableval], [enable_o_flag_munging=yes]) AC_MSG_RESULT($enable_o_flag_munging) AM_CONDITIONAL(ENABLE_O_FLAG_MUNGING, test "$enable_o_flag_munging" = "yes") # Implementation of the --disable-instrumentation-munging switch. AC_MSG_CHECKING([whether a instrumentation (-fprofile, -fsanitize) munging is requested]) AC_ARG_ENABLE([instrumentation-munging], AS_HELP_STRING([--disable-instrumentation-munging], [Disable modification of the cc instrumentation options]), [enable_instrumentation_munging=$enableval], [enable_instrumentation_munging=yes]) AC_MSG_RESULT($enable_instrumentation_munging) AM_CONDITIONAL(ENABLE_INSTRUMENTATION_MUNGING, test "$enable_instrumentation_munging" = "yes") # Implementation of the --disable-amd64-as-feature-detection switch. AC_MSG_CHECKING([whether to enable AMD64 as(1) feature detection]) AC_ARG_ENABLE(amd64-as-feature-detection, AS_HELP_STRING([--disable-amd64-as-feature-detection], [Disable the auto-detection of AMD64 as(1) features]), amd64_as_feature_detection=$enableval, amd64_as_feature_detection=yes) AC_MSG_RESULT($amd64_as_feature_detection) AC_DEFINE_UNQUOTED(PRINTABLE_OS_NAME, "$PRINTABLE_OS_NAME", [A human readable text with the name of the OS]) # For some systems we know that we have ld_version scripts. # Use it then as default. have_ld_version_script=no case "${host}" in *-*-linux*) have_ld_version_script=yes ;; *-*-gnu*) have_ld_version_script=yes ;; esac AC_ARG_ENABLE([ld-version-script], AS_HELP_STRING([--enable-ld-version-script], [enable/disable use of linker version script. (default is system dependent)]), [have_ld_version_script=$enableval], [ : ] ) AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$have_ld_version_script" = "yes") AC_DEFINE_UNQUOTED(NAME_OF_DEV_RANDOM, "$NAME_OF_DEV_RANDOM", [defined to the name of the strong random device]) AC_DEFINE_UNQUOTED(NAME_OF_DEV_URANDOM, "$NAME_OF_DEV_URANDOM", [defined to the name of the weaker random device]) ############################### #### Checks for libraries. #### ############################### # # gpg-error is required. # AM_PATH_GPG_ERROR("$NEED_GPG_ERROR_VERSION") if test "x$GPG_ERROR_LIBS" = "x"; then AC_MSG_ERROR([libgpg-error is needed. See ftp://ftp.gnupg.org/gcrypt/libgpg-error/ .]) fi AC_DEFINE(GPG_ERR_SOURCE_DEFAULT, GPG_ERR_SOURCE_GCRYPT, [The default error source for libgcrypt.]) AM_CONDITIONAL(USE_GPGRT_CONFIG, [test -n "$GPGRT_CONFIG" \ -a "$ac_cv_path_GPG_ERROR_CONFIG" = no]) # # Check whether pthreads is available # if test "$have_w32_system" != yes; then AC_CHECK_LIB(pthread,pthread_create,have_pthread=yes) if test "$have_pthread" = yes; then AC_DEFINE(HAVE_PTHREAD, 1 ,[Define if we have pthread.]) fi fi # Solaris needs -lsocket and -lnsl. Unisys system includes # gethostbyname in libsocket but needs libnsl for socket. AC_SEARCH_LIBS(setsockopt, [socket], , [AC_SEARCH_LIBS(setsockopt, [socket], , , [-lnsl])]) AC_SEARCH_LIBS(setsockopt, [nsl]) ################################## #### Checks for header files. #### ################################## AC_CHECK_HEADERS(unistd.h sys/auxv.h sys/random.h) ########################################## #### Checks for typedefs, structures, #### #### and compiler characteristics. #### ########################################## AC_C_CONST AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_PID_T AC_CHECK_TYPES([byte, ushort, u16, u32, u64]) # # Check for __builtin_bswap32 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap32, [gcry_cv_have_builtin_bswap32], [gcry_cv_have_builtin_bswap32=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [int x = 0; int y = __builtin_bswap32(x); return y;])], [gcry_cv_have_builtin_bswap32=yes])]) if test "$gcry_cv_have_builtin_bswap32" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP32,1, [Defined if compiler has '__builtin_bswap32' intrinsic]) fi # # Check for __builtin_bswap64 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap64, [gcry_cv_have_builtin_bswap64], [gcry_cv_have_builtin_bswap64=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [long long x = 0; long long y = __builtin_bswap64(x); return y;])], [gcry_cv_have_builtin_bswap64=yes])]) if test "$gcry_cv_have_builtin_bswap64" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP64,1, [Defined if compiler has '__builtin_bswap64' intrinsic]) fi # # Check for __builtin_ctz intrinsic. # AC_CACHE_CHECK(for __builtin_ctz, [gcry_cv_have_builtin_ctz], [gcry_cv_have_builtin_ctz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_ctz(x); return y;])], [gcry_cv_have_builtin_ctz=yes])]) if test "$gcry_cv_have_builtin_ctz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZ, 1, [Defined if compiler has '__builtin_ctz' intrinsic]) fi # # Check for __builtin_ctzl intrinsic. # AC_CACHE_CHECK(for __builtin_ctzl, [gcry_cv_have_builtin_ctzl], [gcry_cv_have_builtin_ctzl=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned long x = 0; long y = __builtin_ctzl(x); return y;])], [gcry_cv_have_builtin_ctzl=yes])]) if test "$gcry_cv_have_builtin_ctzl" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZL, 1, [Defined if compiler has '__builtin_ctzl' intrinsic]) fi # # Check for __builtin_clz intrinsic. # AC_CACHE_CHECK(for __builtin_clz, [gcry_cv_have_builtin_clz], [gcry_cv_have_builtin_clz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_clz(x); return y;])], [gcry_cv_have_builtin_clz=yes])]) if test "$gcry_cv_have_builtin_clz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CLZ, 1, [Defined if compiler has '__builtin_clz' intrinsic]) fi # # Check for __builtin_clzl intrinsic. # AC_CACHE_CHECK(for __builtin_clzl, [gcry_cv_have_builtin_clzl], [gcry_cv_have_builtin_clzl=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned long x = 0; long y = __builtin_clzl(x); return y;])], [gcry_cv_have_builtin_clzl=yes])]) if test "$gcry_cv_have_builtin_clzl" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CLZL, 1, [Defined if compiler has '__builtin_clzl' intrinsic]) fi # # Check for __sync_synchronize intrinsic. # AC_CACHE_CHECK(for __sync_synchronize, [gcry_cv_have_sync_synchronize], [gcry_cv_have_sync_synchronize=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [__sync_synchronize(); return 0;])], [gcry_cv_have_sync_synchronize=yes])]) if test "$gcry_cv_have_sync_synchronize" = "yes" ; then AC_DEFINE(HAVE_SYNC_SYNCHRONIZE, 1, [Defined if compiler has '__sync_synchronize' intrinsic]) fi # # Check for VLA support (variable length arrays). # AC_CACHE_CHECK(whether the variable length arrays are supported, [gcry_cv_have_vla], [gcry_cv_have_vla=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void f1(char *, int); char foo(int i) { char b[(i < 0 ? 0 : i) + 1]; f1(b, sizeof b); return b[0];}]])], [gcry_cv_have_vla=yes])]) if test "$gcry_cv_have_vla" = "yes" ; then AC_DEFINE(HAVE_VLA,1, [Defined if variable length arrays are supported]) fi # # Check for ELF visibility support. # AC_CACHE_CHECK(whether the visibility attribute is supported, gcry_cv_visibility_attribute, [gcry_cv_visibility_attribute=no AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo __attribute__ ((visibility ("hidden"))) = 1; int bar __attribute__ ((visibility ("protected"))) = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden.*foo' conftest.s >/dev/null 2>&1 ; then if grep '\.protected.*bar' conftest.s >/dev/null 2>&1; then gcry_cv_visibility_attribute=yes fi fi fi ]) if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken visibility attribute, gcry_cv_broken_visibility_attribute, [gcry_cv_broken_visibility_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo (int x); int bar (int x) __asm__ ("foo") __attribute__ ((visibility ("hidden"))); int bar (int x) { return x; } ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden@<:@ _@:>@foo' conftest.s >/dev/null 2>&1; then gcry_cv_broken_visibility_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken alias attribute, gcry_cv_broken_alias_attribute, [gcry_cv_broken_alias_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[extern int foo (int x) __asm ("xyzzy"); int bar (int x) { return x; } extern __typeof (bar) foo __attribute ((weak, alias ("bar"))); extern int dfoo; extern __typeof (dfoo) dfoo __asm ("abccb"); int dfoo = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep 'xyzzy' conftest.s >/dev/null 2>&1 && \ grep 'abccb' conftest.s >/dev/null 2>&1; then gcry_cv_broken_alias_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(if gcc supports -fvisibility=hidden, gcry_cv_gcc_has_f_visibility, [gcry_cv_gcc_has_f_visibility=no _gcc_cflags_save=$CFLAGS CFLAGS="-fvisibility=hidden" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])], gcry_cv_gcc_has_f_visibility=yes) CFLAGS=$_gcc_cflags_save; ]) fi if test "$gcry_cv_visibility_attribute" = "yes" \ && test "$gcry_cv_broken_visibility_attribute" != "yes" \ && test "$gcry_cv_broken_alias_attribute" != "yes" \ && test "$gcry_cv_gcc_has_f_visibility" = "yes" then AC_DEFINE(GCRY_USE_VISIBILITY, 1, [Define to use the GNU C visibility attribute.]) CFLAGS="$CFLAGS -fvisibility=hidden" fi # Following attribute tests depend on warnings to cause compile to fail, # so set -Werror temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether the compiler supports the GCC style aligned attribute # AC_CACHE_CHECK([whether the GCC style aligned attribute is supported], [gcry_cv_gcc_attribute_aligned], [gcry_cv_gcc_attribute_aligned=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct { int a; } foo __attribute__ ((aligned (16)));]])], [gcry_cv_gcc_attribute_aligned=yes])]) if test "$gcry_cv_gcc_attribute_aligned" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_ALIGNED,1, [Defined if a GCC style "__attribute__ ((aligned (n))" is supported]) fi # # Check whether the compiler supports the GCC style packed attribute # AC_CACHE_CHECK([whether the GCC style packed attribute is supported], [gcry_cv_gcc_attribute_packed], [gcry_cv_gcc_attribute_packed=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct foolong_s { long b; } __attribute__ ((packed)); struct foo_s { char a; struct foolong_s b; } __attribute__ ((packed)); enum bar { FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))), };]])], [gcry_cv_gcc_attribute_packed=yes])]) if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1, [Defined if a GCC style "__attribute__ ((packed))" is supported]) fi # # Check whether the compiler supports the GCC style may_alias attribute # AC_CACHE_CHECK([whether the GCC style may_alias attribute is supported], [gcry_cv_gcc_attribute_may_alias], [gcry_cv_gcc_attribute_may_alias=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[typedef struct foo_s { int a; } __attribute__ ((may_alias)) foo_t;]])], [gcry_cv_gcc_attribute_may_alias=yes])]) if test "$gcry_cv_gcc_attribute_may_alias" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MAY_ALIAS,1, [Defined if a GCC style "__attribute__ ((may_alias))" is supported]) fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether the compiler supports 'asm' or '__asm__' keyword for # assembler blocks. # AC_CACHE_CHECK([whether 'asm' assembler keyword is supported], [gcry_cv_have_asm], [gcry_cv_have_asm=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm("":::"memory"); }]])], [gcry_cv_have_asm=yes])]) AC_CACHE_CHECK([whether '__asm__' assembler keyword is supported], [gcry_cv_have___asm__], [gcry_cv_have___asm__=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("":::"memory"); }]])], [gcry_cv_have___asm__=yes])]) if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_DEFINE(asm,__asm__, [Define to supported assembler block keyword, if plain 'asm' was not supported]) fi fi # # Check whether the compiler supports inline assembly memory barrier. # if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(int x) { __asm__ volatile("":::"memory"); __asm__ volatile("":"+r"(x)::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi else AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(int x) { asm volatile("":::"memory"); asm volatile("":"+r"(x)::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_VOLATILE_MEMORY,1, [Define if inline asm memory barrier is supported]) fi # # Check whether GCC assembler supports features needed for our ARM # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly implementations], [gcry_cv_gcc_arm_platform_as_ok], [if test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_arm_platform_as_ok="n/a" else gcry_cv_gcc_arm_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if assembler supports UAL syntax. */ ".syntax unified\n\t" ".arm\n\t" /* our assembly code is in ARM mode */ ".text\n\t" /* Following causes error if assembler ignored '.syntax unified'. */ "asmfunc:\n\t" "add %r0, %r0, %r4, ror #12;\n\t" /* Test if '.type' and '.size' are supported. */ ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,%function;\n\t" );]], [ asmfunc(); ] )], [gcry_cv_gcc_arm_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARM assembly implementations]) fi # # Check whether GCC assembler supports features needed for our ARMv8/Aarch64 # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly implementations], [gcry_cv_gcc_aarch64_platform_as_ok], [if test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_aarch64_platform_as_ok="n/a" else gcry_cv_gcc_aarch64_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" "asmfunc:\n\t" "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" );]], [ asmfunc(); ] )], [gcry_cv_gcc_aarch64_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARMv8/Aarch64 assembly implementations]) fi # # Check whether GCC assembler supports for CFI directives. # AC_CACHE_CHECK([whether GCC assembler supports for CFI directives], [gcry_cv_gcc_asm_cfi_directives], [gcry_cv_gcc_asm_cfi_directives=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" "ac_test:\n\t" ".cfi_startproc\n\t" ".cfi_remember_state\n\t" ".cfi_adjust_cfa_offset 8\n\t" ".cfi_rel_offset 0, 8\n\t" ".cfi_def_cfa_register 1\n\t" ".cfi_register 2, 3\n\t" ".cfi_restore 2\n\t" ".cfi_escape 0x0f, 0x02, 0x11, 0x00\n\t" ".cfi_restore_state\n\t" ".long 0\n\t" ".cfi_endproc\n\t" );]])], [gcry_cv_gcc_asm_cfi_directives=yes])]) if test "$gcry_cv_gcc_asm_cfi_directives" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_CFI_DIRECTIVES,1, [Defined if underlying assembler supports for CFI directives]) fi # # Check whether GCC assembler supports for ELF directives. # AC_CACHE_CHECK([whether GCC assembler supports for ELF directives], [gcry_cv_gcc_asm_elf_directives], [gcry_cv_gcc_asm_elf_directives=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if ELF directives '.type' and '.size' are supported. */ ".text\n\t" "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,STT_FUNC;\n\t" );]])], [gcry_cv_gcc_asm_elf_directives=yes])]) if test "$gcry_cv_gcc_asm_elf_directives" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_ELF_DIRECTIVES,1, [Defined if underlying assembler supports for ELF directives]) fi # # Check whether underscores in symbols are required. This needs to be # done before setting up the assembler stuff. # GNUPG_SYS_SYMBOL_UNDERSCORE() ################################# #### #### #### Setup assembler stuff. #### #### Define mpi_cpu_arch. #### #### #### ################################# AC_ARG_ENABLE(mpi-path, AS_HELP_STRING([--enable-mpi-path=EXTRA_PATH], [prepend EXTRA_PATH to list of CPU specific optimizations]), mpi_extra_path="$enableval",mpi_extra_path="") AC_MSG_CHECKING(architecture and mpi assembler functions) if test -f $srcdir/mpi/config.links ; then . $srcdir/mpi/config.links AC_CONFIG_LINKS("$mpi_ln_list") ac_cv_mpi_sflags="$mpi_sflags" AC_MSG_RESULT($mpi_cpu_arch) else AC_MSG_RESULT(failed) AC_MSG_ERROR([mpi/config.links missing!]) fi MPI_SFLAGS="$ac_cv_mpi_sflags" AC_SUBST(MPI_SFLAGS) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_ADD1, test "$mpi_mod_asm_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_SUB1, test "$mpi_mod_asm_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL1, test "$mpi_mod_asm_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL2, test "$mpi_mod_asm_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL3, test "$mpi_mod_asm_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_LSHIFT, test "$mpi_mod_asm_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_RSHIFT, test "$mpi_mod_asm_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV, test "$mpi_mod_asm_udiv" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV_QRNND, test "$mpi_mod_asm_udiv_qrnnd" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_ADD1, test "$mpi_mod_c_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_SUB1, test "$mpi_mod_c_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL1, test "$mpi_mod_c_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL2, test "$mpi_mod_c_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL3, test "$mpi_mod_c_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_LSHIFT, test "$mpi_mod_c_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_RSHIFT, test "$mpi_mod_c_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV, test "$mpi_mod_c_udiv" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes) # Reset non applicable feature flags. if test "$mpi_cpu_arch" != "x86" ; then aesnisupport="n/a" shaextsupport="n/a" pclmulsupport="n/a" sse41support="n/a" avxsupport="n/a" avx2support="n/a" avx512support="n/a" gfnisupport="n/a" padlocksupport="n/a" drngsupport="n/a" fi if test "$mpi_cpu_arch" != "arm" ; then if test "$mpi_cpu_arch" != "aarch64" ; then neonsupport="n/a" armcryptosupport="n/a" fi fi if test "$mpi_cpu_arch" != "ppc"; then ppccryptosupport="n/a" fi ############################################# #### #### #### Platform specific compiler checks. #### #### #### ############################################# # Following tests depend on warnings to cause compile to fail, so set -Werror # temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether compiler supports 'ms_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute], [gcry_cv_gcc_attribute_ms_abi], [gcry_cv_gcc_attribute_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((ms_abi)) proto(int);]])], [gcry_cv_gcc_attribute_ms_abi=yes])]) if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1, [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute]) fi # # Check whether compiler supports 'sysv_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute], [gcry_cv_gcc_attribute_sysv_abi], [gcry_cv_gcc_attribute_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((sysv_abi)) proto(int);]])], [gcry_cv_gcc_attribute_sysv_abi=yes])]) if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1, [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute]) fi # # Check whether default calling convention is 'ms_abi'. # if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'ms_abi'], [gcry_cv_gcc_default_abi_is_ms_abi], [gcry_cv_gcc_default_abi_is_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((ms_abi))(*msabi_func)(void); /* warning on SysV abi targets, passes on Windows based targets */ msabi_func = def_func; return msabi_func; }]])], [gcry_cv_gcc_default_abi_is_ms_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1, [Defined if default calling convention is 'ms_abi']) fi fi # # Check whether default calling convention is 'sysv_abi'. # if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'], [gcry_cv_gcc_default_abi_is_sysv_abi], [gcry_cv_gcc_default_abi_is_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((sysv_abi))(*sysvabi_func)(void); /* warning on MS ABI targets, passes on SysV ABI targets */ sysvabi_func = def_func; return sysvabi_func; }]])], [gcry_cv_gcc_default_abi_is_sysv_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1, [Defined if default calling convention is 'sysv_abi']) fi fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports SSSE3 instructions # This is required for the AES-NI instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSSE3 instructions], [gcry_cv_gcc_inline_asm_ssse3], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ssse3="n/a" else gcry_cv_gcc_inline_asm_ssse3=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[static unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; void a(void) { __asm__("pshufb %[mask], %%xmm2\n\t"::[mask]"m"(*be_mask):); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_ssse3=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ssse3" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSSE3,1, [Defined if inline assembler supports SSSE3 instructions]) fi # # Check whether GCC inline assembler supports PCLMUL instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions], [gcry_cv_gcc_inline_asm_pclmul], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_pclmul="n/a" else gcry_cv_gcc_inline_asm_pclmul=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_pclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1, [Defined if inline assembler supports PCLMUL instructions]) fi # # Check whether GCC inline assembler supports SHA Extensions instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SHA Extensions instructions], [gcry_cv_gcc_inline_asm_shaext], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_shaext="n/a" else gcry_cv_gcc_inline_asm_shaext=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("sha1rnds4 \$0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1nexte %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg2 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256rnds2 %%xmm0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg2 %%xmm1, %%xmm3\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_shaext=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_shaext" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SHAEXT,1, [Defined if inline assembler supports SHA Extensions instructions]) fi # # Check whether GCC inline assembler supports SSE4.1 instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSE4.1 instructions], [gcry_cv_gcc_inline_asm_sse41], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_sse41="n/a" else gcry_cv_gcc_inline_asm_sse41=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { int i; __asm__("pextrd \$2, %%xmm0, %[out]\n\t" : [out] "=m" (i)); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_sse41=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_sse41" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSE41,1, [Defined if inline assembler supports SSE4.1 instructions]) fi # # Check whether GCC inline assembler supports AVX instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions], [gcry_cv_gcc_inline_asm_avx], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx="n/a" else gcry_cv_gcc_inline_asm_avx=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX,1, [Defined if inline assembler supports AVX instructions]) fi # # Check whether GCC inline assembler supports AVX2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions], [gcry_cv_gcc_inline_asm_avx2], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx2="n/a" else gcry_cv_gcc_inline_asm_avx2=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX2,1, [Defined if inline assembler supports AVX2 instructions]) fi # # Check whether GCC inline assembler supports AVX512 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX512 instructions], [gcry_cv_gcc_inline_asm_avx512], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx512="n/a" else gcry_cv_gcc_inline_asm_avx512=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vpopcntq %%zmm7, %%zmm1%{%%k1%}%{z%};\n\t":::"cc"); __asm__("vpexpandb %%zmm3, %%zmm1;\n\t":::"cc"); __asm__("vpxorq %%xmm7, %%xmm7, %%xmm7;\n\t":::"cc"); __asm__("vpxorq %%ymm7, %%ymm7, %%ymm7;\n\t":::"cc"); __asm__("vpxorq (%%eax)%{1to8%}, %%zmm7, %%zmm7;\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx512=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx512" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX512,1, [Defined if inline assembler supports AVX512 instructions]) fi # # Check whether GCC inline assembler supports VAES and VPCLMUL instructions # AC_CACHE_CHECK([whether GCC inline assembler supports VAES and VPCLMUL instructions], [gcry_cv_gcc_inline_asm_vaes_vpclmul], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_vaes_vpclmul="n/a" else gcry_cv_gcc_inline_asm_vaes_vpclmul=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("vaesenclast %%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/ __asm__("vaesenclast %%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/ __asm__("vpclmulqdq \$0,%%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/ __asm__("vpclmulqdq \$0,%%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/ }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_vaes_vpclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_vaes_vpclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL,1, [Defined if inline assembler supports VAES and VPCLMUL instructions]) fi # # Check whether GCC inline assembler supports GFNI instructions # AC_CACHE_CHECK([whether GCC inline assembler supports GFNI instructions], [gcry_cv_gcc_inline_asm_gfni], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_gfni="n/a" else gcry_cv_gcc_inline_asm_gfni=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("gf2p8affineqb \$123, %%xmm0, %%xmm0;\n\t":::"cc"); /* SSE */ __asm__("vgf2p8affineinvqb \$234, %%ymm1, %%ymm1, %%ymm1;\n\t":::"cc"); /* AVX */ __asm__("vgf2p8mulb (%%eax), %%zmm2, %%zmm2;\n\t":::"cc"); /* AVX512 */ }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_gfni=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_gfni" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_GFNI,1, [Defined if inline assembler supports GFNI instructions]) fi # # Check whether GCC inline assembler supports BMI2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports BMI2 instructions], [gcry_cv_gcc_inline_asm_bmi2], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_bmi2="n/a" else gcry_cv_gcc_inline_asm_bmi2=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[unsigned int a(unsigned int x, unsigned int y) { unsigned int tmp1, tmp2; asm ("rorxl %2, %1, %0" : "=r" (tmp1) : "rm0" (x), "J" (32 - ((23) & 31))); asm ("andnl %2, %1, %0" : "=r" (tmp2) : "r0" (x), "rm" (y)); return tmp1 + tmp2; }]], [ a(1, 2); ] )], [gcry_cv_gcc_inline_asm_bmi2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_bmi2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_BMI2,1, [Defined if inline assembler supports BMI2 instructions]) fi # # Check whether GCC assembler needs "-Wa,--divide" to correctly handle # constant division # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler handles division correctly], [gcry_cv_gcc_as_const_division_ok], [gcry_cv_gcc_as_const_division_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t");]], [fn();])], [gcry_cv_gcc_as_const_division_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_ok" = "no" ; then # # Add '-Wa,--divide' to CPPFLAGS and try check again. # _gcc_cppflags_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -Wa,--divide" AC_CACHE_CHECK([whether GCC assembler handles division correctly with "-Wa,--divide"], [gcry_cv_gcc_as_const_division_with_wadivide_ok], [gcry_cv_gcc_as_const_division_with_wadivide_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t");]], [fn();])], [gcry_cv_gcc_as_const_division_with_wadivide_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_with_wadivide_ok" = "no" ; then # '-Wa,--divide' did not work, restore old flags. CPPFLAGS="$_gcc_cppflags_save" fi fi fi # # Check whether GCC assembler supports features needed for our amd64 # implementations # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler is compatible for amd64 assembly implementations], [gcry_cv_gcc_amd64_platform_as_ok], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_amd64_platform_as_ok="n/a" else gcry_cv_gcc_amd64_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if '.type' and '.size' are supported. */ /* These work only on ELF targets. */ ".text\n\t" "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,@function;\n\t" /* Test if assembler allows use of '/' for constant division * (Solaris/x86 issue). If previous constant division check * and "-Wa,--divide" workaround failed, this causes assembly * to be disable on this machine. */ "xorl \$(123456789/12345678), %ebp;\n\t" );]], [ asmfunc(); ])], [gcry_cv_gcc_amd64_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with amd64 assembly implementations]) fi if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" && test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" && test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations], [gcry_cv_gcc_win64_platform_as_ok], [gcry_cv_gcc_win64_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" ".globl asmfunc\n\t" "asmfunc:\n\t" "xorq \$(1234), %rbp;\n\t" );]], [ asmfunc(); ])], [gcry_cv_gcc_win64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with WIN64 assembly implementations]) fi fi fi # # Check whether GCC assembler supports features needed for assembly # implementations that use Intel syntax # AC_CACHE_CHECK([whether GCC assembler is compatible for Intel syntax assembly implementations], [gcry_cv_gcc_platform_as_ok_for_intel_syntax], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_platform_as_ok_for_intel_syntax="n/a" else gcry_cv_gcc_platform_as_ok_for_intel_syntax=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".intel_syntax noprefix\n\t" ".text\n\t" "actest:\n\t" "pxor xmm1, xmm7;\n\t" "vperm2i128 ymm2, ymm3, ymm0, 1;\n\t" "add eax, ebp;\n\t" "rorx eax, ebp, 1;\n\t" "sub eax, [esp + 4];\n\t" "add dword ptr [esp + eax], 0b10101;\n\t" ".att_syntax prefix\n\t" );]], [ actest(); ])], [gcry_cv_gcc_platform_as_ok_for_intel_syntax=yes]) fi]) if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then AC_DEFINE(HAVE_INTEL_SYNTAX_PLATFORM_AS,1, [Defined if underlying assembler is compatible with Intel syntax assembly implementations]) fi # # Check whether compiler is configured for ARMv6 or newer architecture # AC_CACHE_CHECK([whether compiler is configured for ARMv6 or newer architecture], [gcry_cv_cc_arm_arch_is_v6], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_cc_arm_arch_is_v6="n/a" else gcry_cv_cc_arm_arch_is_v6=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[ #if defined(__arm__) && \ ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7EM__)) /* empty */ #else /* fail compile if not ARMv6. */ not_armv6 not_armv6 = (not_armv6)not_armv6; #endif ]])], [gcry_cv_cc_arm_arch_is_v6=yes]) fi]) if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then AC_DEFINE(HAVE_ARM_ARCH_V6,1, [Defined if ARM architecture is v6 or newer]) fi # # Check whether GCC inline assembler supports NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports NEON instructions], [gcry_cv_gcc_inline_asm_neon], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_neon="n/a" else gcry_cv_gcc_inline_asm_neon=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".syntax unified\n\t" ".arm\n\t" ".fpu neon\n\t" ".text\n\t" "testfn:\n\t" "vld1.64 {%q0-%q1}, [%r0]!;\n\t" "vrev64.8 %q0, %q3;\n\t" "vadd.u64 %q0, %q1;\n\t" "vadd.s64 %d3, %d2, %d3;\n\t" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_NEON,1, [Defined if inline assembler supports NEON instructions]) fi # # Check whether GCC inline assembler supports AArch32 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch32 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch32_crypto], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch32_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch32_crypto=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".syntax unified\n\t" ".arch armv8-a\n\t" ".arm\n\t" ".fpu crypto-neon-fp-armv8\n\t" ".text\n\t" "testfn:\n\t" "sha1h.32 q0, q0;\n\t" "sha1c.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha1su0.32 q0, q0, q0;\n\t" "sha1su1.32 q0, q0;\n\t" "sha256h.32 q0, q0, q0;\n\t" "sha256h2.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha256su0.32 q0, q0;\n\t" "sha256su1.32 q0, q0, q15;\n\t" "aese.8 q0, q0;\n\t" "aesd.8 q0, q0;\n\t" "aesmc.8 q0, q0;\n\t" "aesimc.8 q0, q0;\n\t" "vmull.p64 q0, d0, d0;\n\t" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch32_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO,1, [Defined if inline assembler supports AArch32 Crypto Extension instructions]) fi # # Check whether GCC inline assembler supports AArch64 NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 NEON instructions], [gcry_cv_gcc_inline_asm_aarch64_neon], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_neon="n/a" else gcry_cv_gcc_inline_asm_aarch64_neon=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".cpu generic+simd\n\t" ".text\n\t" "testfn:\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_NEON,1, [Defined if inline assembler supports AArch64 NEON instructions]) fi # # Check whether GCC inline assembler supports AArch64 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch64_crypto], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch64_crypto=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".cpu generic+simd+crypto\n\t" ".text\n\t" "testfn:\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" "sha1h s0, s0;\n\t" "sha1c q0, s0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha1su0 v0.4s, v0.4s, v0.4s;\n\t" "sha1su1 v0.4s, v0.4s;\n\t" "sha256h q0, q0, v0.4s;\n\t" "sha256h2 q0, q0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha256su0 v0.4s, v0.4s;\n\t" "sha256su1 v0.4s, v0.4s, v31.4s;\n\t" "aese v0.16b, v0.16b;\n\t" "aesd v0.16b, v0.16b;\n\t" "aesmc v0.16b, v0.16b;\n\t" "aesimc v0.16b, v0.16b;\n\t" "pmull v0.1q, v0.1d, v31.1d;\n\t" "pmull2 v0.1q, v0.2d, v31.2d;\n\t" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO,1, [Defined if inline assembler supports AArch64 Crypto Extension instructions]) fi # # Check whether PowerPC AltiVec/VSX intrinsics # AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics], [gcry_cv_cc_ppc_altivec], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_cc_ppc_altivec="n/a" else gcry_cv_cc_ppc_altivec=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include typedef vector unsigned char block; typedef vector unsigned int vecu32; static inline __attribute__((always_inline)) vecu32 vec_sld_u32(vecu32 a, vecu32 b, unsigned int idx) { return vec_sld (a, b, (4 * idx) & 15); } block fn(block in) { block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); vecu32 y = vec_vsx_ld (0, (unsigned int*)0); y = vec_sld_u32 (y, y, 3); return vec_cipher_be (t, in) ^ (block)y; } ]])], [gcry_cv_cc_ppc_altivec=yes]) fi]) if test "$gcry_cv_cc_ppc_altivec" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics]) fi _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -O2 -maltivec -mvsx -mcrypto" if test "$gcry_cv_cc_ppc_altivec" = "no" && test "$mpi_cpu_arch" = "ppc" && test "$try_asm_modules" == "yes" ; then AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags], [gcry_cv_cc_ppc_altivec_cflags], [gcry_cv_cc_ppc_altivec_cflags=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include typedef vector unsigned char block; typedef vector unsigned int vecu32; static inline __attribute__((always_inline)) vecu32 vec_sld_u32(vecu32 a, vecu32 b, unsigned int idx) { return vec_sld (a, b, (4 * idx) & 15); } block fn(block in) { block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); vecu32 y = vec_vsx_ld (0, (unsigned int*)0); y = vec_sld_u32 (y, y, 3); return vec_cipher_be (t, in) ^ (block)y; }]])], [gcry_cv_cc_ppc_altivec_cflags=yes])]) if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics]) AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags]) fi fi AM_CONDITIONAL(ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS, test "$gcry_cv_cc_ppc_altivec_cflags" = "yes") # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions # AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions], [gcry_cv_gcc_inline_asm_ppc_altivec], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ppc_altivec="n/a" else gcry_cv_gcc_inline_asm_ppc_altivec=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".globl testfn;\n" ".text\n\t" "testfn:\n" "stvx %v31,%r12,%r0;\n" "lvx %v20,%r12,%r0;\n" "vcipher %v0, %v1, %v22;\n" "lxvw4x %vs32, %r0, %r1;\n" "vadduwm %v0, %v1, %v22;\n" "vshasigmaw %v0, %v1, 0, 15;\n" "vshasigmad %v0, %v1, 0, 15;\n" "vpmsumd %v11, %v11, %v11;\n" ); ]], [ testfn(); ] )], [gcry_cv_gcc_inline_asm_ppc_altivec=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC,1, [Defined if inline assembler supports PowerPC AltiVec/VSX/crypto instructions]) fi # # Check whether GCC inline assembler supports PowerISA 3.00 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports PowerISA 3.00 instructions], [gcry_cv_gcc_inline_asm_ppc_arch_3_00], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ppc_arch_3_00="n/a" else gcry_cv_gcc_inline_asm_ppc_arch_3_00=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\t" ".globl testfn;\n" "testfn:\n" "stxvb16x %r1,%v12,%v30;\n" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_ppc_arch_3_00=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00,1, [Defined if inline assembler supports PowerISA 3.00 instructions]) fi # # Check whether GCC inline assembler supports zSeries instructions # AC_CACHE_CHECK([whether GCC inline assembler supports zSeries instructions], [gcry_cv_gcc_inline_asm_s390x], [if test "$mpi_cpu_arch" != "s390x" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_s390x="n/a" else gcry_cv_gcc_inline_asm_s390x=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[typedef unsigned int u128_t __attribute__ ((mode (TI))); unsigned int testfunc(unsigned int x, void *y, unsigned int z) { unsigned long fac[8]; register unsigned long reg0 asm("0") = 0; register unsigned long reg1 asm("1") = x; u128_t r1 = ((u128_t)(unsigned long)y << 64) | (unsigned long)z; u128_t r2 = 0; u128_t r3 = 0; asm volatile (".insn rre,0xb92e << 16, %[r1], %[r2]\n\t" : [r1] "+a" (r1), [r2] "+a" (r2) : "r" (reg0), "r" (reg1) : "cc", "memory"); asm volatile (".insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t" : [r1] "+a" (r1), [r2] "+a" (r2), [r3] "+a" (r3) : "r" (reg0), "r" (reg1) : "cc", "memory"); reg0 = 8 - 1; asm ("stfle %1\n\t" : "+d" (reg0), "=Q" (fac[0]) : : "cc", "memory"); asm volatile ("mvc 0(16, %0), 0(%1)\n\t" : : "a" (y), "a" (fac) : "memory"); asm volatile ("xc 0(16, %0), 0(%0)\n\t" : : "a" (fac) : "memory"); asm volatile ("risbgn %%r11, %%r11, 0, 129, 0\n\t" : : : "memory", "r11"); asm volatile ("algrk %%r14, %%r14, %%r14\n\t" : : : "memory", "r14"); return (unsigned int)r1 ^ reg0; } ]] , [ testfunc(0, 0, 0); ])], [gcry_cv_gcc_inline_asm_s390x=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_S390X,1, [Defined if inline assembler supports zSeries instructions]) fi # # Check whether GCC inline assembler supports zSeries vector instructions # AC_CACHE_CHECK([whether GCC inline assembler supports zSeries vector instructions], [gcry_cv_gcc_inline_asm_s390x_vx], [if test "$mpi_cpu_arch" != "s390x" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_s390x_vx="n/a" else gcry_cv_gcc_inline_asm_s390x_vx=no if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void testfunc(void) { asm volatile (".machine \"z13+vx\"\n\t" "vx %%v0, %%v1, %%v31\n\t" "verllf %%v11, %%v11, (16)(0)\n\t" : : : "memory"); } ]], [ testfunc(); ])], [gcry_cv_gcc_inline_asm_s390x_vx=yes]) fi fi]) if test "$gcry_cv_gcc_inline_asm_s390x_vx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_S390X_VX,1, [Defined if inline assembler supports zSeries vector instructions]) fi ####################################### #### Checks for library functions. #### ####################################### AC_FUNC_VPRINTF # We have replacements for these in src/missing-string.c AC_CHECK_FUNCS(stpcpy strcasecmp) # We have replacements for these in src/g10lib.h AC_CHECK_FUNCS(strtoul memmove stricmp atexit raise) # Other checks AC_CHECK_FUNCS(strerror rand mmap getpagesize sysconf waitpid wait4) AC_CHECK_FUNCS(gettimeofday getrusage gethrtime clock_gettime syslog) AC_CHECK_FUNCS(syscall fcntl ftruncate flockfile getauxval elf_aux_info) AC_CHECK_FUNCS(explicit_bzero explicit_memset getentropy) GNUPG_CHECK_MLOCK # # Replacement functions. # AC_REPLACE_FUNCS([getpid clock]) # # Check whether it is necessary to link against libdl. # DL_LIBS="" if test "$use_hmac_binary_check" != no ; then _gcry_save_libs="$LIBS" LIBS="" AC_SEARCH_LIBS(dlopen, c dl,,,) DL_LIBS=$LIBS LIBS="$_gcry_save_libs" fi AC_SUBST(DL_LIBS) # # Check whether we can use Linux capabilities as requested. # if test "$use_capabilities" = "yes" ; then use_capabilities=no AC_CHECK_HEADERS(sys/capability.h) if test "$ac_cv_header_sys_capability_h" = "yes" ; then AC_CHECK_LIB(cap, cap_init, ac_need_libcap=1) if test "$ac_cv_lib_cap_cap_init" = "yes"; then AC_DEFINE(USE_CAPABILITIES,1, [define if capabilities should be used]) LIBS="$LIBS -lcap" use_capabilities=yes fi fi if test "$use_capabilities" = "no" ; then AC_MSG_WARN([[ *** *** The use of capabilities on this system is not possible. *** You need a recent Linux kernel and some patches: *** fcaps-2.2.9-990610.patch (kernel patch for 2.2.9) *** fcap-module-990613.tar.gz (kernel module) *** libcap-1.92.tar.gz (user mode library and utilities) *** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN *** set (filesystems menu). Be warned: This code is *really* ALPHA. ***]]) fi fi # Check whether a random device is available. if test "$try_dev_random" = yes ; then AC_CACHE_CHECK(for random device, ac_cv_have_dev_random, [if test -r "$NAME_OF_DEV_RANDOM" && test -r "$NAME_OF_DEV_URANDOM" ; then ac_cv_have_dev_random=yes; else ac_cv_have_dev_random=no; fi]) if test "$ac_cv_have_dev_random" = yes; then AC_DEFINE(HAVE_DEV_RANDOM,1, [defined if the system supports a random device] ) fi else AC_MSG_CHECKING(for random device) ac_cv_have_dev_random=no AC_MSG_RESULT(has been disabled) fi # Figure out the random modules for this configuration. if test "$random" = "default"; then # Select default value. if test "$ac_cv_func_getentropy" = yes; then random_modules="getentropy" elif test "$ac_cv_have_dev_random" = yes; then # Try Linuxish random device. random_modules="linux" else case "${host}" in *-*-mingw32ce*) # WindowsCE random device. random_modules="w32ce" ;; *-*-mingw32*|*-*-cygwin*) # Windows random device. random_modules="w32" ;; *) # Build everything, allow to select at runtime. random_modules="$auto_random_modules" ;; esac fi else if test "$random" = "auto"; then # Build everything, allow to select at runtime. random_modules="$auto_random_modules" else random_modules="$random" fi fi # # Other defines # if test mym4_isgit = "yes"; then AC_DEFINE(IS_DEVELOPMENT_VERSION,1, [Defined if this is not a regular release]) fi AM_CONDITIONAL(CROSS_COMPILING, test x$cross_compiling = xyes) # This is handy for debugging so the compiler doesn't rearrange # things and eliminate variables. AC_ARG_ENABLE(optimization, AS_HELP_STRING([--disable-optimization], [disable compiler optimization]), [if test $enableval = no ; then CFLAGS=`echo $CFLAGS | sed 's/-O[[0-9]]//'` fi]) AC_MSG_NOTICE([checking for cc features]) # CFLAGS mangling when using gcc. if test "$GCC" = yes; then AC_MSG_CHECKING([if gcc supports -fno-delete-null-pointer-checks]) _gcc_cflags_save=$CFLAGS CFLAGS="-fno-delete-null-pointer-checks" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -fno-delete-null-pointer-checks" fi CFLAGS="$CFLAGS -Wall" if test "$USE_MAINTAINER_MODE" = "yes"; then CFLAGS="$CFLAGS -Wcast-align -Wshadow -Wstrict-prototypes" CFLAGS="$CFLAGS -Wformat -Wno-format-y2k -Wformat-security" # If -Wno-missing-field-initializers is supported we can enable a # a bunch of really useful warnings. AC_MSG_CHECKING([if gcc supports -Wno-missing-field-initializers]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wno-missing-field-initializers" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -W -Wextra -Wbad-function-cast" CFLAGS="$CFLAGS -Wwrite-strings" CFLAGS="$CFLAGS -Wdeclaration-after-statement" CFLAGS="$CFLAGS -Wno-missing-field-initializers" CFLAGS="$CFLAGS -Wno-sign-compare" fi AC_MSG_CHECKING([if gcc supports -Wpointer-arith]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wpointer-arith" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -Wpointer-arith" fi fi fi # Check whether as(1) supports a noeexecstack feature. This test # includes an override option. CL_AS_NOEXECSTACK AC_SUBST(LIBGCRYPT_CONFIG_API_VERSION) AC_SUBST(LIBGCRYPT_CONFIG_LIBS) AC_SUBST(LIBGCRYPT_CONFIG_CFLAGS) AC_SUBST(LIBGCRYPT_CONFIG_HOST) AC_SUBST(LIBGCRYPT_THREAD_MODULES) AC_CONFIG_COMMANDS([gcrypt-conf],[[ chmod +x src/libgcrypt-config ]],[[ prefix=$prefix exec_prefix=$exec_prefix libdir=$libdir datadir=$datadir DATADIRNAME=$DATADIRNAME ]]) ##################### #### Conclusion. #### ##################### # Check that requested feature can actually be used and define # ENABLE_foo_SUPPORT macros. if test x"$aesnisupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_ssse3" != "yes" ; then aesnisupport="no (unsupported by compiler)" fi fi if test x"$shaextsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_shaext" != "yes" ; then shaextsupport="no (unsupported by compiler)" fi fi if test x"$pclmulsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then pclmulsupport="no (unsupported by compiler)" fi fi if test x"$sse41support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_sse41" != "yes" ; then sse41support="no (unsupported by compiler)" fi fi if test x"$avxsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then avxsupport="no (unsupported by compiler)" fi fi if test x"$avx2support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx2" != "yes" ; then avx2support="no (unsupported by compiler)" fi fi if test x"$avx512support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx512" != "yes" ; then avx512support="no (unsupported by compiler)" fi fi if test x"$gfnisupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_gfni" != "yes" ; then gfnisupport="no (unsupported by compiler)" fi fi if test x"$neonsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_neon" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_neon" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$armcryptosupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" != "yes" ; then armcryptosupport="no (unsupported by compiler)" fi fi fi if test x"$aesnisupport" = xyes ; then AC_DEFINE(ENABLE_AESNI_SUPPORT, 1, [Enable support for Intel AES-NI instructions.]) fi if test x"$shaextsupport" = xyes ; then AC_DEFINE(ENABLE_SHAEXT_SUPPORT, 1, [Enable support for Intel SHAEXT instructions.]) fi if test x"$pclmulsupport" = xyes ; then AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1, [Enable support for Intel PCLMUL instructions.]) fi if test x"$sse41support" = xyes ; then AC_DEFINE(ENABLE_SSE41_SUPPORT, 1, [Enable support for Intel SSE4.1 instructions.]) fi if test x"$avxsupport" = xyes ; then AC_DEFINE(ENABLE_AVX_SUPPORT,1, [Enable support for Intel AVX instructions.]) fi if test x"$avx2support" = xyes ; then AC_DEFINE(ENABLE_AVX2_SUPPORT,1, [Enable support for Intel AVX2 instructions.]) fi if test x"$avx512support" = xyes ; then AC_DEFINE(ENABLE_AVX512_SUPPORT,1, [Enable support for Intel AVX512 instructions.]) fi if test x"$gfnisupport" = xyes ; then AC_DEFINE(ENABLE_GFNI_SUPPORT,1, [Enable support for Intel GFNI instructions.]) fi if test x"$neonsupport" = xyes ; then AC_DEFINE(ENABLE_NEON_SUPPORT,1, [Enable support for ARM NEON instructions.]) fi if test x"$armcryptosupport" = xyes ; then AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi if test x"$ppccryptosupport" = xyes ; then AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1, [Enable support for POWER 8 (PowerISA 2.07) crypto extension.]) fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) fi if test x"$padlocksupport" = xyes ; then AC_DEFINE(ENABLE_PADLOCK_SUPPORT, 1, [Enable support for the PadLock engine.]) fi if test x"$drngsupport" = xyes ; then AC_DEFINE(ENABLE_DRNG_SUPPORT, 1, [Enable support for Intel DRNG (RDRAND instruction).]) fi if test x"$force_soft_hwfeatures" = xyes ; then AC_DEFINE(ENABLE_FORCE_SOFT_HWFEATURES, 1, [Enable forcing 'soft' HW feature bits on (for testing).]) fi # Define conditional sources and config.h symbols depending on the # selected ciphers, pubkey-ciphers, digests, kdfs, and random modules. LIST_MEMBER(arcfour, $enabled_ciphers) if test "$found" = "1"; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo" AC_DEFINE(USE_ARCFOUR, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS arcfour-amd64.lo" ;; esac fi LIST_MEMBER(blowfish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish.lo" AC_DEFINE(USE_BLOWFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-arm.lo" ;; esac fi LIST_MEMBER(cast5, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo" AC_DEFINE(USE_CAST5, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-arm.lo" ;; esac fi LIST_MEMBER(des, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo" AC_DEFINE(USE_DES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS des-amd64.lo" ;; esac fi LIST_MEMBER(aes, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael.lo" AC_DEFINE(USE_AES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-amd64.lo" # Build with the SSSE3 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64-asm.lo" # Build with the VAES/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes-avx2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-arm.lo" # Build with the ARMv8/AArch32 CE implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aarch64.lo" # Build with the ARMv8/AArch64 CE implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc9le.lo" if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" && test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then # Build with AES-GCM bulk implementation for P10 GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-gcm-p10le.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-p10le.lo" fi ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" ;; s390x-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-s390x.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the AES-NI implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aesni.lo" # Build with the Padlock implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-padlock.lo" ;; esac fi LIST_MEMBER(twofish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish.lo" AC_DEFINE(USE_TWOFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-amd64.lo" if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-avx2-amd64.lo" fi ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-aarch64.lo" ;; esac fi LIST_MEMBER(serpent, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent.lo" AC_DEFINE(USE_SERPENT, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the SSE2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-sse2-amd64.lo" ;; esac if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-avx2-amd64.lo" fi if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-armv7-neon.lo" fi fi LIST_MEMBER(rfc2268, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rfc2268.lo" AC_DEFINE(USE_RFC2268, 1, [Defined if this module should be included]) fi LIST_MEMBER(seed, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS seed.lo" AC_DEFINE(USE_SEED, 1, [Defined if this module should be included]) fi LIST_MEMBER(camellia, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia.lo camellia-glue.lo" AC_DEFINE(USE_CAMELLIA, 1, [Defined if this module should be included]) case "${host}" in arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aarch64.lo" ;; esac if test x"$avxsupport" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx-amd64.lo" fi fi if test x"$avx2support" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx2-amd64.lo" # Build with the VAES/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-vaes-avx2-amd64.lo" # Build with the GFNI/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-gfni-avx2-amd64.lo" # Build with the GFNI/AVX512 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-gfni-avx512-amd64.lo" fi fi fi LIST_MEMBER(idea, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS idea.lo" AC_DEFINE(USE_IDEA, 1, [Defined if this module should be included]) fi LIST_MEMBER(salsa20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20.lo" AC_DEFINE(USE_SALSA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-amd64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-armv7-neon.lo" fi fi LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS gost28147.lo" AC_DEFINE(USE_GOST28147, 1, [Defined if this module should be included]) fi LIST_MEMBER(chacha20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20.lo" AC_DEFINE(USE_CHACHA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-ssse3.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-avx2.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-avx512.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-aarch64.lo" ;; powerpc64le-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" + # Build with the assembly implementation + if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" && + test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then + GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-p10le-8x.lo" + fi ;; powerpc64-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" ;; powerpc-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" ;; s390x-*-*) # Build with the s390x/zSeries vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-s390x.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-armv7-neon.lo" fi fi LIST_MEMBER(sm4, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS sm4.lo" AC_DEFINE(USE_SM4, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx2-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-gfni-avx2-amd64.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aarch64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-armv8-aarch64-ce.lo" esac fi LIST_MEMBER(dsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS dsa.lo" AC_DEFINE(USE_DSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(rsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS rsa.lo" AC_DEFINE(USE_RSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(elgamal, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS elgamal.lo" AC_DEFINE(USE_ELGAMAL, 1, [Defined if this module should be included]) fi LIST_MEMBER(ecc, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo \ ecc-sm2.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi LIST_MEMBER(crc, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc.lo" AC_DEFINE(USE_CRC, 1, [Defined if this module should be included]) case "${host}" in i?86-*-* | x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-intel-pclmul.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-ce.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; powerpc64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; powerpc-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; esac fi LIST_MEMBER(gostr3411-94, $enabled_digests) if test "$found" = "1" ; then # GOST R 34.11-94 internally uses GOST 28147-89 LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS gostr3411-94.lo" AC_DEFINE(USE_GOST_R_3411_94, 1, [Defined if this module should be included]) fi fi LIST_MEMBER(stribog, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS stribog.lo" AC_DEFINE(USE_GOST_R_3411_12, 1, [Defined if this module should be included]) fi LIST_MEMBER(md2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md2.lo" AC_DEFINE(USE_MD2, 1, [Defined if this module should be included]) fi LIST_MEMBER(md4, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md4.lo" AC_DEFINE(USE_MD4, 1, [Defined if this module should be included]) fi LIST_MEMBER(md5, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md5.lo" AC_DEFINE(USE_MD5, 1, [Defined if this module should be included]) fi LIST_MEMBER(rmd160, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS rmd160.lo" AC_DEFINE(USE_RMD160, 1, [Defined if this module should be included]) fi LIST_MEMBER(sha256, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256.lo" AC_DEFINE(USE_SHA256, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-intel-shaext.lo" ;; esac fi LIST_MEMBER(sha512, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512.lo" AC_DEFINE(USE_SHA512, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx2-bmi2-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx512-amd64.lo" ;; i?86-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-i386.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-arm.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-armv7-neon.lo" fi fi LIST_MEMBER(sha3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak.lo" AC_DEFINE(USE_SHA3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation : ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS keccak-armv7-neon.lo" fi fi LIST_MEMBER(tiger, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS tiger.lo" AC_DEFINE(USE_TIGER, 1, [Defined if this module should be included]) fi LIST_MEMBER(whirlpool, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool.lo" AC_DEFINE(USE_WHIRLPOOL, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS whirlpool-sse2-amd64.lo" ;; esac fi LIST_MEMBER(blake2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2.lo" AC_DEFINE(USE_BLAKE2, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2b-amd64-avx2.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2s-amd64-avx.lo" ;; esac fi LIST_MEMBER(sm3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo" AC_DEFINE(USE_SM3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-avx-bmi2-amd64.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-aarch64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-armv8-aarch64-ce.lo" ;; esac fi # SHA-1 needs to be included always for example because it is used by # random-csprng.c. GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1.lo" AC_DEFINE(USE_SHA1, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-bmi2-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv7-neon.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-intel-shaext.lo" ;; esac # Arch specific GCM implementations case "${host}" in i?86-*-* | x86_64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-intel-pclmul.lo" ;; arm*-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv7-neon.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch32-ce.lo" ;; aarch64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch64-ce.lo" ;; powerpc64le-*-* | powerpc64-*-* | powerpc-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-ppc.lo" ;; esac # Arch specific MAC implementations case "${host}" in s390x-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-s390x.lo" ;; x86_64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-amd64-avx512.lo" ;; + powerpc64le-*-*) + # Build with the assembly implementation + if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" && + test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then + GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-p10le.lo" + fi + ;; esac LIST_MEMBER(scrypt, $enabled_kdfs) if test "$found" = "1" ; then GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo" AC_DEFINE(USE_SCRYPT, 1, [Defined if this module should be included]) fi LIST_MEMBER(getentropy, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndgetentropy.lo" AC_DEFINE(USE_RNDGETENTROPY, 1, [Defined if the getentropy RNG should be used.]) fi LIST_MEMBER(linux, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndoldlinux.lo" AC_DEFINE(USE_RNDOLDLINUX, 1, [Defined if the /dev/random RNG should be used.]) fi LIST_MEMBER(unix, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndunix.lo" AC_DEFINE(USE_RNDUNIX, 1, [Defined if the default Unix RNG should be used.]) fi LIST_MEMBER(egd, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndegd.lo" AC_DEFINE(USE_RNDEGD, 1, [Defined if the EGD based RNG should be used.]) fi LIST_MEMBER(w32, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32.lo" AC_DEFINE(USE_RNDW32, 1, [Defined if the Windows specific RNG should be used.]) fi LIST_MEMBER(w32ce, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32ce.lo" AC_DEFINE(USE_RNDW32CE, 1, [Defined if the WindowsCE specific RNG should be used.]) fi if test "$try_asm_modules" = yes ; then # Build with assembly implementations GCRYPT_CIPHERS="$GCRYPT_CIPHERS $GCRYPT_ASM_CIPHERS" GCRYPT_DIGESTS="$GCRYPT_DIGESTS $GCRYPT_ASM_DIGESTS" fi AC_SUBST([GCRYPT_CIPHERS]) AC_SUBST([GCRYPT_PUBKEY_CIPHERS]) AC_SUBST([GCRYPT_DIGESTS]) AC_SUBST([GCRYPT_KDFS]) AC_SUBST([GCRYPT_RANDOM]) AC_SUBST(LIBGCRYPT_CIPHERS, $enabled_ciphers) AC_SUBST(LIBGCRYPT_PUBKEY_CIPHERS, $enabled_pubkey_ciphers) AC_SUBST(LIBGCRYPT_DIGESTS, $enabled_digests) # For printing the configuration we need a colon separated list of # algorithm names. tmp=`echo "$enabled_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_CIPHERS, "$tmp", [List of available cipher algorithms]) tmp=`echo "$enabled_pubkey_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_PUBKEY_CIPHERS, "$tmp", [List of available public key cipher algorithms]) tmp=`echo "$enabled_digests" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_DIGESTS, "$tmp", [List of available digest algorithms]) tmp=`echo "$enabled_kdfs" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_KDFS, "$tmp", [List of available KDF algorithms]) # # Define conditional sources depending on the used hardware platform. # Note that all possible modules must also be listed in # src/Makefile.am (EXTRA_libgcrypt_la_SOURCES). # GCRYPT_HWF_MODULES= case "$mpi_cpu_arch" in x86) AC_DEFINE(HAVE_CPU_ARCH_X86, 1, [Defined for the x86 platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-x86.lo" ;; alpha) AC_DEFINE(HAVE_CPU_ARCH_ALPHA, 1, [Defined for Alpha platforms]) ;; sparc) AC_DEFINE(HAVE_CPU_ARCH_SPARC, 1, [Defined for SPARC platforms]) ;; mips) AC_DEFINE(HAVE_CPU_ARCH_MIPS, 1, [Defined for MIPS platforms]) ;; m68k) AC_DEFINE(HAVE_CPU_ARCH_M68K, 1, [Defined for M68k platforms]) ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo" ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; aarch64) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM AArch64 platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; s390x) AC_DEFINE(HAVE_CPU_ARCH_S390X, 1, [Defined for s390x/zSeries platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-s390x.lo" ;; esac AC_SUBST([GCRYPT_HWF_MODULES]) # # Option to disable building of doc file # build_doc=yes AC_ARG_ENABLE([doc], AS_HELP_STRING([--disable-doc], [do not build the documentation]), build_doc=$enableval, build_doc=yes) AM_CONDITIONAL([BUILD_DOC], [test "x$build_doc" != xno]) # # Provide information about the build. # BUILD_REVISION="mym4_revision" AC_SUBST(BUILD_REVISION) AC_DEFINE_UNQUOTED(BUILD_REVISION, "$BUILD_REVISION", [GIT commit id revision used to build this package]) changequote(,)dnl BUILD_VERSION=`echo "$PACKAGE_VERSION" | sed 's/\([0-9.]*\).*/\1./'` changequote([,])dnl BUILD_VERSION="${BUILD_VERSION}mym4_revision_dec" BUILD_FILEVERSION=`echo "${BUILD_VERSION}" | tr . ,` AC_SUBST(BUILD_VERSION) AC_SUBST(BUILD_FILEVERSION) AC_ARG_ENABLE([build-timestamp], AS_HELP_STRING([--enable-build-timestamp], [set an explicit build timestamp for reproducibility. (default is the current time in ISO-8601 format)]), [if test "$enableval" = "yes"; then BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date` else BUILD_TIMESTAMP="$enableval" fi], [BUILD_TIMESTAMP=""]) AC_SUBST(BUILD_TIMESTAMP) AC_DEFINE_UNQUOTED(BUILD_TIMESTAMP, "$BUILD_TIMESTAMP", [The time this package was configured for a build]) # And create the files. AC_CONFIG_FILES([ Makefile m4/Makefile compat/Makefile mpi/Makefile cipher/Makefile random/Makefile doc/Makefile src/Makefile src/gcrypt.h src/libgcrypt-config src/libgcrypt.pc src/versioninfo.rc tests/Makefile ]) AC_CONFIG_FILES([tests/hashtest-256g], [chmod +x tests/hashtest-256g]) AC_CONFIG_FILES([tests/basic-disable-all-hwf], [chmod +x tests/basic-disable-all-hwf]) AC_OUTPUT detection_module="${GCRYPT_HWF_MODULES%.lo}" test -n "$detection_module" || detection_module="none" # Give some feedback GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Libgcrypt],[v${VERSION} has been configured as follows:]) GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Platform: ],[$PRINTABLE_OS_NAME ($host)]) GCRY_MSG_SHOW([Hardware detection module:],[$detection_module]) GCRY_MSG_WRAP([Enabled cipher algorithms:],[$enabled_ciphers]) GCRY_MSG_WRAP([Enabled digest algorithms:],[$enabled_digests]) GCRY_MSG_WRAP([Enabled kdf algorithms: ],[$enabled_kdfs]) GCRY_MSG_WRAP([Enabled pubkey algorithms:],[$enabled_pubkey_ciphers]) GCRY_MSG_SHOW([Random number generator: ],[$random]) GCRY_MSG_SHOW([Try using jitter entropy: ],[$jentsupport]) GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities]) GCRY_MSG_SHOW([FIPS module version: ],[$fips_module_version]) GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport]) GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport]) GCRY_MSG_SHOW([Try using Intel SHAEXT: ],[$shaextsupport]) GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport]) GCRY_MSG_SHOW([Try using Intel SSE4.1: ],[$sse41support]) GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport]) GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport]) GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using Intel AVX512: ],[$avx512support]) GCRY_MSG_SHOW([Try using Intel GFNI: ],[$gfnisupport]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) GCRY_MSG_SHOW([Try using PPC crypto: ],[$ppccryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then cat <