diff --git a/LICENSES b/LICENSES index 94499501..67b80e64 100644 --- a/LICENSES +++ b/LICENSES @@ -1,288 +1,318 @@ Additional license notices for Libgcrypt. -*- org -*- This file contains the copying permission notices for various files in the Libgcrypt distribution which are not covered by the GNU Lesser General Public License (LGPL) or the GNU General Public License (GPL). These notices all require that a copy of the notice be included in the accompanying documentation and be distributed with binary distributions of the code, so be sure to include this file along with any binary distributions derived from the GNU C Library. * BSD_3Clause For files: - cipher/sha256-avx-amd64.S - cipher/sha256-avx2-bmi2-amd64.S - cipher/sha256-ssse3-amd64.S - cipher/sha512-avx-amd64.S - cipher/sha512-avx2-bmi2-amd64.S - cipher/sha512-ssse3-amd64.S - cipher/sha512-ssse3-i386.c - cipher/sha512-avx512-amd64.S #+begin_quote Copyright (c) 2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #+end_quote + For files: + - cipher/poly1305-amd64-avx512.S + +#+begin_quote + Copyright (c) 2021-2022, Intel Corporation + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#+end_quote + For files: - random/jitterentropy-base.c - random/jitterentropy-gcd.c - random/jitterentropy-gcd.h - random/jitterentropy-health.c - random/jitterentropy-health.h - random/jitterentropy-noise.c - random/jitterentropy-noise.h - random/jitterentropy-sha3.c - random/jitterentropy-sha3.h - random/jitterentropy-timer.c - random/jitterentropy-timer.h - random/jitterentropy.h - random/rndjent.c (plus common Libgcrypt copyright holders) #+begin_quote Copyright (C) 2017 - 2021, Stephan Mueller Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, and the entire permission notice in its entirety, including the disclaimer of warranties. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. ALTERNATIVELY, this product may be distributed under the terms of the GNU General Public License, in which case the provisions of the GPL2 are required INSTEAD OF the above restrictions. (This clause is necessary due to a potential bad interaction between the GPL and the restrictions contained in a BSD-style copyright.) THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #+end_quote For files: - cipher/cipher-gcm-ppc.c #+begin_quote Copyright (c) 2006, CRYPTOGAMS by All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain copyright notices, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the CRYPTOGAMS nor the names of its copyright holder and contributors may be used to endorse or promote products derived from this software without specific prior written permission. ALTERNATIVELY, provided that this notice is retained in full, this product may be distributed under the terms of the GNU General Public License (GPL), in which case the provisions of the GPL apply INSTEAD OF those given above. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #+end_quote * X License For files: - install.sh #+begin_quote Copyright (C) 1994 X Consortium Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the name of the X Consortium shall not be used in advertising or otherwise to promote the sale, use or other deal- ings in this Software without prior written authorization from the X Consor- tium. #+end_quote * Public domain For files: - cipher/arcfour-amd64.S #+begin_quote Author: Marc Bevand Licence: I hereby disclaim the copyright on this code and place it in the public domain. #+end_quote * OCB license 1 For files: - cipher/cipher-ocb.c #+begin_quote OCB is covered by several patents but may be used freely by most software. See http://web.cs.ucdavis.edu/~rogaway/ocb/license.htm . In particular license 1 is suitable for Libgcrypt: See http://web.cs.ucdavis.edu/~rogaway/ocb/license1.pdf for the full license document; it basically says: License 1 — License for Open-Source Software Implementations of OCB (Jan 9, 2013) Under this license, you are authorized to make, use, and distribute open-source software implementations of OCB. This license terminates for you if you sue someone over their open-source software implementation of OCB claiming that you have a patent covering their implementation. License for Open Source Software Implementations of OCB January 9, 2013 1 Definitions 1.1 “Licensor” means Phillip Rogaway. 1.2 “Licensed Patents” means any patent that claims priority to United States Patent Application No. 09/918,615 entitled “Method and Apparatus for Facilitating Efficient Authenticated Encryption,” and any utility, divisional, provisional, continuation, continuations-in-part, reexamination, reissue, or foreign counterpart patents that may issue with respect to the aforesaid patent application. This includes, but is not limited to, United States Patent No. 7,046,802; United States Patent No. 7,200,227; United States Patent No. 7,949,129; United States Patent No. 8,321,675 ; and any patent that issues out of United States Patent Application No. 13/669,114. 1.3 “Use” means any practice of any invention claimed in the Licensed Patents. 1.4 “Software Implementation” means any practice of any invention claimed in the Licensed Patents that takes the form of software executing on a user-programmable, general-purpose computer or that takes the form of a computer-readable medium storing such software. Software Implementation does not include, for example, application-specific integrated circuits (ASICs), field-programmable gate arrays (FPGAs), embedded systems, or IP cores. 1.5 “Open Source Software” means software whose source code is published and made available for inspection and use by anyone because either (a) the source code is subject to a license that permits recipients to copy, modify, and distribute the source code without payment of fees or royalties, or (b) the source code is in the public domain, including code released for public use through a CC0 waiver. All licenses certified by the Open Source Initiative at opensource.org as of January 9, 2013 and all Creative Commons licenses identified on the creativecommons.org website as of January 9, 2013, including the Public License Fallback of the CC0 waiver, satisfy these requirements for the purposes of this license. 1.6 “Open Source Software Implementation” means a Software Implementation in which the software implicating the Licensed Patents is Open Source Software. Open Source Software Implementation does not include any Software Implementation in which the software implicating the Licensed Patents is combined, so as to form a larger program, with software that is not Open Source Software. 2 License Grant 2.1 License. Subject to your compliance with the term s of this license, including the restriction set forth in Section 2.2, Licensor hereby grants to you a perpetual, worldwide, non-exclusive, non-transferable, non-sublicenseable, no-charge, royalty-free, irrevocable license to practice any invention claimed in the Licensed Patents in any Open Source Software Implementation. 2.2 Restriction. If you or your affiliates institute patent litigation (including, but not limited to, a cross-claim or counterclaim in a lawsuit) against any entity alleging that any Use authorized by this license infringes another patent, then any rights granted to you under this license automatically terminate as of the date such litigation is filed. 3 Disclaimer YOUR USE OF THE LICENSED PATENTS IS AT YOUR OWN RISK AND UNLESS REQUIRED BY APPLICABLE LAW, LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE LICENSED PATENTS OR ANY PRODUCT EMBODYING ANY LICENSED PATENT, EXPRESS OR IMPLIED, STATUT ORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NONINFRINGEMENT. IN NO EVENT WILL LICENSOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM OR RELATED TO ANY USE OF THE LICENSED PATENTS, INCLUDING, WITHOUT LIMITATION, DIRECT, INDIRECT, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR SPECIAL DAMAGES, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES PRIOR TO SUCH AN OCCURRENCE. #+end_quote diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 30be9f98..582205a3 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -1,277 +1,277 @@ # Makefile for cipher modules # Copyright (C) 1998, 1999, 2000, 2001, 2002, # 2003, 2009 Free Software Foundation, Inc. # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # Process this file with automake to produce Makefile.in # Need to include ../src in addition to top_srcdir because gcrypt.h is # a built header. AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi AM_CFLAGS = $(GPG_ERROR_CFLAGS) AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) EXTRA_DIST = gost-s-box.c CLEANFILES = gost-s-box$(EXEEXT_FOR_BUILD) DISTCLEANFILES = gost-sb.h noinst_LTLIBRARIES = libcipher.la GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) libcipher_la_LIBADD = $(GCRYPT_MODULES) libcipher_la_SOURCES = \ cipher.c cipher-internal.h \ cipher-cbc.c \ cipher-cfb.c \ cipher-ofb.c \ cipher-ctr.c \ cipher-aeswrap.c \ cipher-ccm.c \ cipher-cmac.c \ cipher-gcm.c \ cipher-poly1305.c \ cipher-ocb.c \ cipher-xts.c \ cipher-eax.c \ cipher-siv.c \ cipher-gcm-siv.c \ cipher-selftest.c cipher-selftest.h \ pubkey.c pubkey-internal.h pubkey-util.c \ md.c \ mac.c mac-internal.h \ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ poly1305.c poly1305-internal.h \ kdf.c kdf-internal.h \ bithelp.h \ bufhelp.h \ primegen.c \ hash-common.c hash-common.h \ dsa-common.c rsa-common.c \ sha1.h EXTRA_libcipher_la_SOURCES = \ asm-common-aarch64.h \ asm-common-amd64.h \ asm-common-s390x.h \ asm-inline-s390x.h \ asm-poly1305-aarch64.h \ asm-poly1305-amd64.h \ asm-poly1305-s390x.h \ arcfour.c arcfour-amd64.S \ blowfish.c blowfish-amd64.S blowfish-arm.S \ cast5.c cast5-amd64.S cast5-arm.S \ chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \ chacha20-armv7-neon.S chacha20-aarch64.S \ chacha20-ppc.c chacha20-s390x.S \ cipher-gcm-ppc.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ crc.c crc-intel-pclmul.c crc-armv8-ce.c \ crc-armv8-aarch64-ce.S \ crc-ppc.c \ des.c des-amd64.S \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ md4.c \ md5.c \ - poly1305-s390x.S \ + poly1305-s390x.S poly1305-amd64-avx512.S \ rijndael.c rijndael-internal.h rijndael-tables.h \ rijndael-aesni.c rijndael-padlock.c \ rijndael-amd64.S rijndael-arm.S \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-vaes.c rijndael-vaes-avx2-amd64.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ rijndael-ppc.c rijndael-ppc9le.c \ rijndael-p10le.c rijndael-gcm-p10le.s \ rijndael-ppc-common.h rijndael-ppc-functions.h \ rijndael-s390x.c \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ scrypt.c \ seed.c \ serpent.c serpent-sse2-amd64.S \ sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S sm4-aarch64.S \ sm4-armv8-aarch64-ce.S \ serpent-avx2-amd64.S serpent-armv7-neon.S \ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \ sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \ sha256-avx2-bmi2-amd64.S \ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ sha256-intel-shaext.c sha256-ppc.c \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \ sha512-avx2-bmi2-amd64.S sha512-avx512-amd64.S \ sha512-armv7-neon.S sha512-arm.S \ sha512-ppc.c sha512-ssse3-i386.c \ sm3.c sm3-avx-bmi2-amd64.S sm3-aarch64.S sm3-armv8-aarch64-ce.S \ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ twofish-avx2-amd64.S \ rfc2268.c \ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ camellia-aesni-avx2-amd64.h camellia-vaes-avx2-amd64.S \ camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ blake2.c \ blake2b-amd64-avx2.S blake2s-amd64-avx.S gost28147.lo: gost-sb.h gost-sb.h: gost-s-box$(EXEEXT_FOR_BUILD) ./gost-s-box$(EXEEXT_FOR_BUILD) $@ gost-s-box$(EXEEXT_FOR_BUILD): gost-s-box.c $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) \ $(CPPFLAGS_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c if ENABLE_O_FLAG_MUNGING o_flag_munging = sed -e 's/-O\([2-9sg][2-9sg]*\)/-O1/' -e 's/-Ofast/-O1/g' else o_flag_munging = cat endif # We need to lower the optimization for this module. tiger.o: $(srcdir)/tiger.c Makefile `echo $(COMPILE) -c $< | $(o_flag_munging) ` tiger.lo: $(srcdir)/tiger.c Makefile `echo $(LTCOMPILE) -c $< | $(o_flag_munging) ` # We need to disable instrumentation for these modules as they use cc as # thin assembly front-end and do not tolerate in-between function calls # inserted by compiler as those functions may clobber the XMM registers. if ENABLE_INSTRUMENTATION_MUNGING instrumentation_munging = sed \ -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ -e 's/-fcoverage[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' else instrumentation_munging = cat endif rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` sha256-ssse3-i386.o: $(srcdir)/sha256-ssse3-i386.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` sha256-ssse3-i386.lo: $(srcdir)/sha256-ssse3-i386.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile `echo $(COMPILE) -c $< | $(instrumentation_munging) ` crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS ppc_vcrypto_cflags = -O2 -maltivec -mvsx -mcrypto else ppc_vcrypto_cflags = endif rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-p10le.o: $(srcdir)/rijndael-p10le.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` rijndael-p10le.lo: $(srcdir)/rijndael-p10le.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha256-ppc.lo: $(srcdir)/sha256-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` chacha20-ppc.o: $(srcdir)/chacha20-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` chacha20-ppc.lo: $(srcdir)/chacha20-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` crc-ppc.o: $(srcdir)/crc-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` cipher-gcm-ppc.o: $(srcdir)/cipher-gcm-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` cipher-gcm-ppc.lo: $(srcdir)/cipher-gcm-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` diff --git a/cipher/poly1305-amd64-avx512.S b/cipher/poly1305-amd64-avx512.S new file mode 100644 index 00000000..48892777 --- /dev/null +++ b/cipher/poly1305-amd64-avx512.S @@ -0,0 +1,1625 @@ +/* +;; +;; Copyright (c) 2021-2022, Intel Corporation +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: +;; +;; * Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; * Neither the name of Intel Corporation nor the names of its contributors +;; may be used to endorse or promote products derived from this software +;; without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;; +*/ +/* + * From: + * https://github.com/intel/intel-ipsec-mb/blob/f0cad21a644231c0f5d4af51f56061a5796343fb/lib/avx512/poly_fma_avx512.asm + * + * Conversion to GAS assembly and integration to libgcrypt + * by Jussi Kivilinna + */ + +#ifdef __x86_64 +#include +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AVX512) +#include "asm-common-amd64.h" + +.intel_syntax noprefix + +.text + +ELF(.type _gcry_poly1305_avx512_consts,@object) +_gcry_poly1305_avx512_consts: + +.align 64 +.Lmask_44: + .quad 0xfffffffffff, 0xfffffffffff, 0xfffffffffff, 0xfffffffffff + .quad 0xfffffffffff, 0xfffffffffff, 0xfffffffffff, 0xfffffffffff + +.align 64 +.Lmask_42: + .quad 0x3ffffffffff, 0x3ffffffffff, 0x3ffffffffff, 0x3ffffffffff + .quad 0x3ffffffffff, 0x3ffffffffff, 0x3ffffffffff, 0x3ffffffffff + +.align 64 +.Lhigh_bit: + .quad 0x10000000000, 0x10000000000, 0x10000000000, 0x10000000000 + .quad 0x10000000000, 0x10000000000, 0x10000000000, 0x10000000000 + +.Lbyte_len_to_mask_table: + .short 0x0000, 0x0001, 0x0003, 0x0007 + .short 0x000f, 0x001f, 0x003f, 0x007f + .short 0x00ff, 0x01ff, 0x03ff, 0x07ff + .short 0x0fff, 0x1fff, 0x3fff, 0x7fff + .short 0xffff + +.align 64 +.Lbyte64_len_to_mask_table: + .quad 0x0000000000000000, 0x0000000000000001 + .quad 0x0000000000000003, 0x0000000000000007 + .quad 0x000000000000000f, 0x000000000000001f + .quad 0x000000000000003f, 0x000000000000007f + .quad 0x00000000000000ff, 0x00000000000001ff + .quad 0x00000000000003ff, 0x00000000000007ff + .quad 0x0000000000000fff, 0x0000000000001fff + .quad 0x0000000000003fff, 0x0000000000007fff + .quad 0x000000000000ffff, 0x000000000001ffff + .quad 0x000000000003ffff, 0x000000000007ffff + .quad 0x00000000000fffff, 0x00000000001fffff + .quad 0x00000000003fffff, 0x00000000007fffff + .quad 0x0000000000ffffff, 0x0000000001ffffff + .quad 0x0000000003ffffff, 0x0000000007ffffff + .quad 0x000000000fffffff, 0x000000001fffffff + .quad 0x000000003fffffff, 0x000000007fffffff + .quad 0x00000000ffffffff, 0x00000001ffffffff + .quad 0x00000003ffffffff, 0x00000007ffffffff + .quad 0x0000000fffffffff, 0x0000001fffffffff + .quad 0x0000003fffffffff, 0x0000007fffffffff + .quad 0x000000ffffffffff, 0x000001ffffffffff + .quad 0x000003ffffffffff, 0x000007ffffffffff + .quad 0x00000fffffffffff, 0x00001fffffffffff + .quad 0x00003fffffffffff, 0x00007fffffffffff + .quad 0x0000ffffffffffff, 0x0001ffffffffffff + .quad 0x0003ffffffffffff, 0x0007ffffffffffff + .quad 0x000fffffffffffff, 0x001fffffffffffff + .quad 0x003fffffffffffff, 0x007fffffffffffff + .quad 0x00ffffffffffffff, 0x01ffffffffffffff + .quad 0x03ffffffffffffff, 0x07ffffffffffffff + .quad 0x0fffffffffffffff, 0x1fffffffffffffff + .quad 0x3fffffffffffffff, 0x7fffffffffffffff + .quad 0xffffffffffffffff + +.Lqword_high_bit_mask: + .short 0, 0x1, 0x5, 0x15, 0x55, 0x57, 0x5f, 0x7f, 0xff + +ELF(.size _gcry_poly1305_avx512_consts,.-_gcry_poly1305_avx512_consts) + +#define raxd eax +#define rbxd ebx +#define rcxd ecx +#define rdxd edx +#define rsid esi +#define rdid edi +#define rbpd ebp +#define rspd esp +#define __DWORD(X) X##d +#define DWORD(R) __DWORD(R) + +#define arg1 rdi +#define arg2 rsi +#define arg3 rdx +#define arg4 rcx + +#define job arg1 +#define gp1 rsi +#define gp2 rcx + +/* ;; don't use rdx and rax - they are needed for multiply operation */ +#define gp3 rbp +#define gp4 r8 +#define gp5 r9 +#define gp6 r10 +#define gp7 r11 +#define gp8 r12 +#define gp9 r13 +#define gp10 r14 +#define gp11 r15 + +#define len gp11 +#define msg gp10 + +#define POLY1305_BLOCK_SIZE 16 + +#define STACK_r_save 0 +#define STACK_r_save_size (6 * 64) +#define STACK_gpr_save (STACK_r_save + STACK_r_save_size) +#define STACK_gpr_save_size (8 * 8) +#define STACK_rsp_save (STACK_gpr_save + STACK_gpr_save_size) +#define STACK_rsp_save_size (1 * 8) +#define STACK_SIZE (STACK_rsp_save + STACK_rsp_save_size) + +#define A2_ZERO(...) /**/ +#define A2_ZERO_INVERT(...) __VA_ARGS__ +#define A2_NOT_ZERO(...) __VA_ARGS__ +#define A2_NOT_ZERO_INVERT(...) /**/ + +#define clear_zmm(vec) vpxord vec, vec, vec + +/* +;; ============================================================================= +;; ============================================================================= +;; Computes hash for message length being multiple of block size +;; ============================================================================= +;; Combining 64-bit x 64-bit multiplication with reduction steps +;; +;; NOTES: +;; 1) A2 here is only two bits so anything above is subject of reduction. +;; Constant C1 = R1 + (R1 >> 2) simplifies multiply with less operations +;; 2) Magic 5x comes from mod 2^130-5 property and incorporating +;; reduction into multiply phase. +;; See "Cheating at modular arithmetic" and "Poly1305's prime: 2^130 - 5" +;; paragraphs at https://loup-vaillant.fr/tutorials/poly1305-design for more details. +;; +;; Flow of the code below is as follows: +;; +;; A2 A1 A0 +;; x R1 R0 +;; ----------------------------- +;; A2×R0 A1×R0 A0×R0 +;; + A0×R1 +;; + 5xA2xR1 5xA1xR1 +;; ----------------------------- +;; [0|L2L] [L1H|L1L] [L0H|L0L] +;; +;; Registers: T3:T2 T1:A0 +;; +;; Completing the multiply and adding (with carry) 3x128-bit limbs into +;; 192-bits again (3x64-bits): +;; A0 = L0L +;; A1 = L0H + L1L +;; T3 = L1H + L2L +; A0 [in/out] GPR with accumulator bits 63:0 +; A1 [in/out] GPR with accumulator bits 127:64 +; A2 [in/out] GPR with accumulator bits 195:128 +; R0 [in] GPR with R constant bits 63:0 +; R1 [in] GPR with R constant bits 127:64 +; C1 [in] C1 = R1 + (R1 >> 2) +; T1 [clobbered] GPR register +; T2 [clobbered] GPR register +; T3 [clobbered] GPR register +; GP_RAX [clobbered] RAX register +; GP_RDX [clobbered] RDX register +; IF_A2 [in] Used if input A2 is not 0 +*/ +#define POLY1305_MUL_REDUCE(A0, A1, A2, R0, R1, C1, T1, T2, T3, GP_RAX, GP_RDX, IF_A2) \ + /* T3:T2 = (A0 * R1) */ \ + mov GP_RAX, R1; \ + mul A0; \ + mov T2, GP_RAX; \ + mov GP_RAX, R0; \ + mov T3, GP_RDX; \ + \ + /* T1:A0 = (A0 * R0) */ \ + mul A0; \ + mov A0, GP_RAX; /* A0 not used in other operations */ \ + mov GP_RAX, R0; \ + mov T1, GP_RDX; \ + \ + /* T3:T2 += (A1 * R0) */ \ + mul A1; \ + add T2, GP_RAX; \ + mov GP_RAX, C1; \ + adc T3, GP_RDX; \ + \ + /* T1:A0 += (A1 * R1x5) */ \ + mul A1; \ + IF_A2(mov A1, A2); /* use A1 for A2 */ \ + add A0, GP_RAX; \ + adc T1, GP_RDX; \ + \ + /* NOTE: A2 is clamped to 2-bits, */ \ + /* R1/R0 is clamped to 60-bits, */ \ + /* their product is less than 2^64. */ \ + \ + IF_A2(/* T3:T2 += (A2 * R1x5) */); \ + IF_A2(imul A1, C1); \ + IF_A2(add T2, A1); \ + IF_A2(mov A1, T1); /* T1:A0 => A1:A0 */ \ + IF_A2(adc T3, 0); \ + \ + IF_A2(/* T3:A1 += (A2 * R0) */); \ + IF_A2(imul A2, R0); \ + IF_A2(add A1, T2); \ + IF_A2(adc T3, A2); \ + \ + IF_A2##_INVERT(/* If A2 == 0, just move and add T1-T2 to A1 */); \ + IF_A2##_INVERT(mov A1, T1); \ + IF_A2##_INVERT(add A1, T2); \ + IF_A2##_INVERT(adc T3, 0); \ + \ + /* At this point, 3 64-bit limbs are in T3:A1:A0 */ \ + /* T3 can span over more than 2 bits so final partial reduction step is needed. */ \ + \ + /* Partial reduction (just to fit into 130 bits) */ \ + /* A2 = T3 & 3 */ \ + /* k = (T3 & ~3) + (T3 >> 2) */ \ + /* Y x4 + Y x1 */ \ + /* A2:A1:A0 += k */ \ + \ + /* Result will be in A2:A1:A0 */ \ + mov T1, T3; \ + mov DWORD(A2), DWORD(T3); \ + and T1, ~3; \ + shr T3, 2; \ + and DWORD(A2), 3; \ + add T1, T3; \ + \ + /* A2:A1:A0 += k (kept in T1) */ \ + add A0, T1; \ + adc A1, 0; \ + adc DWORD(A2), 0 + +/* +;; ============================================================================= +;; ============================================================================= +;; Computes hash for 8 16-byte message blocks, +;; and adds new message blocks to accumulator. +;; +;; It first multiplies all 8 blocks with powers of R: +;; +;; a2 a1 a0 +;; × b2 b1 b0 +;; --------------------------------------- +;; a2×b0 a1×b0 a0×b0 +;; + a1×b1 a0×b1 5×a2×b1 +;; + a0×b2 5×a2×b2 5×a1×b2 +;; --------------------------------------- +;; p2 p1 p0 +;; +;; Then, it propagates the carry (higher bits after bit 43) from lower limbs into higher limbs, +;; multiplying by 5 in case of the carry of p2. +;; +;A0 [in/out] ZMM register containing 1st 44-bit limb of the 8 blocks +;A1 [in/out] ZMM register containing 2nd 44-bit limb of the 8 blocks +;A2 [in/out] ZMM register containing 3rd 44-bit limb of the 8 blocks +;R0 [in] ZMM register (R0) to include the 1st limb of R +;R1 [in] ZMM register (R1) to include the 2nd limb of R +;R2 [in] ZMM register (R2) to include the 3rd limb of R +;R1P [in] ZMM register (R1') to include the 2nd limb of R (multiplied by 5) +;R2P [in] ZMM register (R2') to include the 3rd limb of R (multiplied by 5) +;P0_L [clobbered] ZMM register to contain p[0] of the 8 blocks +;P0_H [clobbered] ZMM register to contain p[0] of the 8 blocks +;P1_L [clobbered] ZMM register to contain p[1] of the 8 blocks +;P1_H [clobbered] ZMM register to contain p[1] of the 8 blocks +;P2_L [clobbered] ZMM register to contain p[2] of the 8 blocks +;P2_H [clobbered] ZMM register to contain p[2] of the 8 blocks +;ZTMP1 [clobbered] Temporary ZMM register +*/ +#define POLY1305_MUL_REDUCE_VEC(A0, A1, A2, R0, R1, R2, R1P, R2P, P0_L, P0_H, \ + P1_L, P1_H, P2_L, P2_H, ZTMP1) \ + /* ;; Reset accumulator */ \ + vpxorq P0_L, P0_L, P0_L; \ + vpxorq P0_H, P0_H, P0_H; \ + vpxorq P1_L, P1_L, P1_L; \ + vpxorq P1_H, P1_H, P1_H; \ + vpxorq P2_L, P2_L, P2_L; \ + vpxorq P2_H, P2_H, P2_H; \ + \ + /* ; Reset accumulator and calculate products */ \ + vpmadd52luq P0_L, A2, R1P; \ + vpmadd52huq P0_H, A2, R1P; \ + vpmadd52luq P1_L, A2, R2P; \ + vpmadd52huq P1_H, A2, R2P; \ + vpmadd52luq P2_L, A2, R0; \ + vpmadd52huq P2_H, A2, R0; \ + \ + vpmadd52luq P1_L, A0, R1; \ + vpmadd52huq P1_H, A0, R1; \ + vpmadd52luq P2_L, A0, R2; \ + vpmadd52huq P2_H, A0, R2; \ + vpmadd52luq P0_L, A0, R0; \ + vpmadd52huq P0_H, A0, R0; \ + \ + vpmadd52luq P0_L, A1, R2P; \ + vpmadd52huq P0_H, A1, R2P; \ + vpmadd52luq P1_L, A1, R0; \ + vpmadd52huq P1_H, A1, R0; \ + vpmadd52luq P2_L, A1, R1; \ + vpmadd52huq P2_H, A1, R1; \ + \ + /* ; Carry propagation (first pass) */ \ + vpsrlq ZTMP1, P0_L, 44; \ + vpandq A0, P0_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpsllq P0_H, P0_H, 8; \ + vpaddq P0_H, P0_H, ZTMP1; \ + vpaddq P1_L, P1_L, P0_H; \ + vpandq A1, P1_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpsrlq ZTMP1, P1_L, 44; \ + vpsllq P1_H, P1_H, 8; \ + vpaddq P1_H, P1_H, ZTMP1; \ + vpaddq P2_L, P2_L, P1_H; \ + vpandq A2, P2_L, [.Lmask_42 ADD_RIP]; /* ; Clear top 22 bits */ \ + vpsrlq ZTMP1, P2_L, 42; \ + vpsllq P2_H, P2_H, 10; \ + vpaddq P2_H, P2_H, ZTMP1; \ + \ + /* ; Carry propagation (second pass) */ \ + \ + /* ; Multiply by 5 the highest bits (above 130 bits) */ \ + vpaddq A0, A0, P2_H; \ + vpsllq P2_H, P2_H, 2; \ + vpaddq A0, A0, P2_H; \ + vpsrlq ZTMP1, A0, 44; \ + vpandq A0, A0, [.Lmask_44 ADD_RIP]; \ + vpaddq A1, A1, ZTMP1; + +/* +;; ============================================================================= +;; ============================================================================= +;; Computes hash for 16 16-byte message blocks, +;; and adds new message blocks to accumulator, +;; interleaving this computation with the loading and splatting +;; of new data. +;; +;; It first multiplies all 16 blocks with powers of R (8 blocks from A0-A2 +;; and 8 blocks from B0-B2, multiplied by R0-R2) +;; +;; a2 a1 a0 +;; × b2 b1 b0 +;; --------------------------------------- +;; a2×b0 a1×b0 a0×b0 +;; + a1×b1 a0×b1 5×a2×b1 +;; + a0×b2 5×a2×b2 5×a1×b2 +;; --------------------------------------- +;; p2 p1 p0 +;; +;; Then, it propagates the carry (higher bits after bit 43) +;; from lower limbs into higher limbs, +;; multiplying by 5 in case of the carry of p2, and adds +;; the results to A0-A2 and B0-B2. +;; +;; ============================================================================= +;A0 [in/out] ZMM register containing 1st 44-bit limb of blocks 1-8 +;A1 [in/out] ZMM register containing 2nd 44-bit limb of blocks 1-8 +;A2 [in/out] ZMM register containing 3rd 44-bit limb of blocks 1-8 +;B0 [in/out] ZMM register containing 1st 44-bit limb of blocks 9-16 +;B1 [in/out] ZMM register containing 2nd 44-bit limb of blocks 9-16 +;B2 [in/out] ZMM register containing 3rd 44-bit limb of blocks 9-16 +;R0 [in] ZMM register (R0) to include the 1st limb of R +;R1 [in] ZMM register (R1) to include the 2nd limb of R +;R2 [in] ZMM register (R2) to include the 3rd limb of R +;R1P [in] ZMM register (R1') to include the 2nd limb of R (multiplied by 5) +;R2P [in] ZMM register (R2') to include the 3rd limb of R (multiplied by 5) +;P0_L [clobbered] ZMM register to contain p[0] of the 8 blocks 1-8 +;P0_H [clobbered] ZMM register to contain p[0] of the 8 blocks 1-8 +;P1_L [clobbered] ZMM register to contain p[1] of the 8 blocks 1-8 +;P1_H [clobbered] ZMM register to contain p[1] of the 8 blocks 1-8 +;P2_L [clobbered] ZMM register to contain p[2] of the 8 blocks 1-8 +;P2_H [clobbered] ZMM register to contain p[2] of the 8 blocks 1-8 +;Q0_L [clobbered] ZMM register to contain p[0] of the 8 blocks 9-16 +;Q0_H [clobbered] ZMM register to contain p[0] of the 8 blocks 9-16 +;Q1_L [clobbered] ZMM register to contain p[1] of the 8 blocks 9-16 +;Q1_H [clobbered] ZMM register to contain p[1] of the 8 blocks 9-16 +;Q2_L [clobbered] ZMM register to contain p[2] of the 8 blocks 9-16 +;Q2_H [clobbered] ZMM register to contain p[2] of the 8 blocks 9-16 +;ZTMP1 [clobbered] Temporary ZMM register +;ZTMP2 [clobbered] Temporary ZMM register +;ZTMP3 [clobbered] Temporary ZMM register +;ZTMP4 [clobbered] Temporary ZMM register +;ZTMP5 [clobbered] Temporary ZMM register +;ZTMP6 [clobbered] Temporary ZMM register +;ZTMP7 [clobbered] Temporary ZMM register +;ZTMP8 [clobbered] Temporary ZMM register +;ZTMP9 [clobbered] Temporary ZMM register +;MSG [in/out] Pointer to message +;LEN [in/out] Length left of message +*/ +#define POLY1305_MSG_MUL_REDUCE_VEC16(A0, A1, A2, B0, B1, B2, R0, R1, R2, R1P, \ + R2P, P0_L, P0_H, P1_L, P1_H, P2_L, P2_H, \ + Q0_L, Q0_H, Q1_L, Q1_H, Q2_L, Q2_H, \ + ZTMP1, ZTMP2, ZTMP3, ZTMP4, ZTMP5, \ + ZTMP6, ZTMP7, ZTMP8, ZTMP9, MSG, LEN) \ + /* ;; Reset accumulator */ \ + vpxorq P0_L, P0_L, P0_L; \ + vpxorq P0_H, P0_H, P0_H; \ + vpxorq P1_L, P1_L, P1_L; \ + vpxorq P1_H, P1_H, P1_H; \ + vpxorq P2_L, P2_L, P2_L; \ + vpxorq P2_H, P2_H, P2_H; \ + vpxorq Q0_L, Q0_L, Q0_L; \ + vpxorq Q0_H, Q0_H, Q0_H; \ + vpxorq Q1_L, Q1_L, Q1_L; \ + vpxorq Q1_H, Q1_H, Q1_H; \ + vpxorq Q2_L, Q2_L, Q2_L; \ + vpxorq Q2_H, Q2_H, Q2_H; \ + \ + /* ;; This code interleaves hash computation with input loading/splatting */ \ + \ + /* ; Calculate products */ \ + vpmadd52luq P0_L, A2, R1P; \ + vpmadd52huq P0_H, A2, R1P; \ + /* ;; input loading of new blocks */ \ + add MSG, POLY1305_BLOCK_SIZE*16; \ + sub LEN, POLY1305_BLOCK_SIZE*16; \ + \ + vpmadd52luq Q0_L, B2, R1P; \ + vpmadd52huq Q0_H, B2, R1P; \ + \ + vpmadd52luq P1_L, A2, R2P; \ + vpmadd52huq P1_H, A2, R2P; \ + /* ; Load next block of data (128 bytes) */ \ + vmovdqu64 ZTMP5, [MSG]; \ + vmovdqu64 ZTMP2, [MSG + 64]; \ + \ + vpmadd52luq Q1_L, B2, R2P; \ + vpmadd52huq Q1_H, B2, R2P; \ + \ + /* ; Interleave new blocks of data */ \ + vpunpckhqdq ZTMP3, ZTMP5, ZTMP2; \ + vpunpcklqdq ZTMP5, ZTMP5, ZTMP2; \ + \ + vpmadd52luq P0_L, A0, R0; \ + vpmadd52huq P0_H, A0, R0; \ + /* ; Highest 42-bit limbs of new blocks */ \ + vpsrlq ZTMP6, ZTMP3, 24; \ + vporq ZTMP6, ZTMP6, [.Lhigh_bit ADD_RIP]; /* ; Add 2^128 to all 8 final qwords of the message */ \ + \ + vpmadd52luq Q0_L, B0, R0; \ + vpmadd52huq Q0_H, B0, R0; \ + \ + /* ; Middle 44-bit limbs of new blocks */ \ + vpsrlq ZTMP2, ZTMP5, 44; \ + vpsllq ZTMP4, ZTMP3, 20; \ + \ + vpmadd52luq P2_L, A2, R0; \ + vpmadd52huq P2_H, A2, R0; \ + vpternlogq ZTMP2, ZTMP4, [.Lmask_44 ADD_RIP], 0xA8; /* ; (A OR B AND C) */ \ + \ + /* ; Lowest 44-bit limbs of new blocks */ \ + vpandq ZTMP5, ZTMP5, [.Lmask_44 ADD_RIP]; \ + \ + vpmadd52luq Q2_L, B2, R0; \ + vpmadd52huq Q2_H, B2, R0; \ + \ + /* ; Load next block of data (128 bytes) */ \ + vmovdqu64 ZTMP8, [MSG + 64*2]; \ + vmovdqu64 ZTMP9, [MSG + 64*3]; \ + \ + vpmadd52luq P1_L, A0, R1; \ + vpmadd52huq P1_H, A0, R1; \ + /* ; Interleave new blocks of data */ \ + vpunpckhqdq ZTMP3, ZTMP8, ZTMP9; \ + vpunpcklqdq ZTMP8, ZTMP8, ZTMP9; \ + \ + vpmadd52luq Q1_L, B0, R1; \ + vpmadd52huq Q1_H, B0, R1; \ + \ + /* ; Highest 42-bit limbs of new blocks */ \ + vpsrlq ZTMP7, ZTMP3, 24; \ + vporq ZTMP7, ZTMP7, [.Lhigh_bit ADD_RIP]; /* ; Add 2^128 to all 8 final qwords of the message */ \ + \ + vpmadd52luq P0_L, A1, R2P; \ + vpmadd52huq P0_H, A1, R2P; \ + \ + /* ; Middle 44-bit limbs of new blocks */ \ + vpsrlq ZTMP9, ZTMP8, 44; \ + vpsllq ZTMP4, ZTMP3, 20; \ + \ + vpmadd52luq Q0_L, B1, R2P; \ + vpmadd52huq Q0_H, B1, R2P; \ + \ + vpternlogq ZTMP9, ZTMP4, [.Lmask_44 ADD_RIP], 0xA8; /* ; (A OR B AND C) */ \ + \ + /* ; Lowest 44-bit limbs of new blocks */ \ + vpandq ZTMP8, ZTMP8, [.Lmask_44 ADD_RIP]; \ + \ + vpmadd52luq P2_L, A0, R2; \ + vpmadd52huq P2_H, A0, R2; \ + /* ; Carry propagation (first pass) */ \ + vpsrlq ZTMP1, P0_L, 44; \ + vpsllq P0_H, P0_H, 8; \ + vpmadd52luq Q2_L, B0, R2; \ + vpmadd52huq Q2_H, B0, R2; \ + \ + vpsrlq ZTMP3, Q0_L, 44; \ + vpsllq Q0_H, Q0_H, 8; \ + \ + vpmadd52luq P1_L, A1, R0; \ + vpmadd52huq P1_H, A1, R0; \ + /* ; Carry propagation (first pass) - continue */ \ + vpandq A0, P0_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpaddq P0_H, P0_H, ZTMP1; \ + vpmadd52luq Q1_L, B1, R0; \ + vpmadd52huq Q1_H, B1, R0; \ + \ + vpandq B0, Q0_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpaddq Q0_H, Q0_H, ZTMP3; \ + \ + vpmadd52luq P2_L, A1, R1; \ + vpmadd52huq P2_H, A1, R1; \ + /* ; Carry propagation (first pass) - continue */ \ + vpaddq P1_L, P1_L, P0_H; \ + vpsllq P1_H, P1_H, 8; \ + vpsrlq ZTMP1, P1_L, 44; \ + vpmadd52luq Q2_L, B1, R1; \ + vpmadd52huq Q2_H, B1, R1; \ + \ + vpandq A1, P1_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpaddq Q1_L, Q1_L, Q0_H; \ + vpsllq Q1_H, Q1_H, 8; \ + vpsrlq ZTMP3, Q1_L, 44; \ + vpandq B1, Q1_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + \ + vpaddq P2_L, P2_L, P1_H; /* ; P2_L += P1_H + P1_L[63:44] */ \ + vpaddq P2_L, P2_L, ZTMP1; \ + vpandq A2, P2_L, [.Lmask_42 ADD_RIP]; /* ; Clear top 22 bits */ \ + vpaddq A2, A2, ZTMP6; /* ; Add highest bits from new blocks to accumulator */ \ + vpsrlq ZTMP1, P2_L, 42; \ + vpsllq P2_H, P2_H, 10; \ + vpaddq P2_H, P2_H, ZTMP1; \ + \ + vpaddq Q2_L, Q2_L, Q1_H; /* ; Q2_L += P1_H + P1_L[63:44] */ \ + vpaddq Q2_L, Q2_L, ZTMP3; \ + vpandq B2, Q2_L, [.Lmask_42 ADD_RIP]; /* ; Clear top 22 bits */ \ + vpaddq B2, B2, ZTMP7; /* ; Add highest bits from new blocks to accumulator */ \ + vpsrlq ZTMP3, Q2_L, 42; \ + vpsllq Q2_H, Q2_H, 10; \ + vpaddq Q2_H, Q2_H, ZTMP3; \ + \ + /* ; Carry propagation (second pass) */ \ + /* ; Multiply by 5 the highest bits (above 130 bits) */ \ + vpaddq A0, A0, P2_H; \ + vpsllq P2_H, P2_H, 2; \ + vpaddq A0, A0, P2_H; \ + vpaddq B0, B0, Q2_H; \ + vpsllq Q2_H, Q2_H, 2; \ + vpaddq B0, B0, Q2_H; \ + \ + vpsrlq ZTMP1, A0, 44; \ + vpandq A0, A0, [.Lmask_44 ADD_RIP]; \ + vpaddq A0, A0, ZTMP5; /* ; Add low 42-bit bits from new blocks to accumulator */ \ + vpaddq A1, A1, ZTMP2; /* ; Add medium 42-bit bits from new blocks to accumulator */ \ + vpaddq A1, A1, ZTMP1; \ + vpsrlq ZTMP3, B0, 44; \ + vpandq B0, B0, [.Lmask_44 ADD_RIP]; \ + vpaddq B0, B0, ZTMP8; /* ; Add low 42-bit bits from new blocks to accumulator */ \ + vpaddq B1, B1, ZTMP9; /* ; Add medium 42-bit bits from new blocks to accumulator */ \ + vpaddq B1, B1, ZTMP3 + +/* +;; ============================================================================= +;; ============================================================================= +;; Computes hash for 16 16-byte message blocks. +;; +;; It first multiplies all 16 blocks with powers of R (8 blocks from A0-A2 +;; and 8 blocks from B0-B2, multiplied by R0-R2 and S0-S2) +;; +;; +;; a2 a1 a0 +;; × b2 b1 b0 +;; --------------------------------------- +;; a2×b0 a1×b0 a0×b0 +;; + a1×b1 a0×b1 5×a2×b1 +;; + a0×b2 5×a2×b2 5×a1×b2 +;; --------------------------------------- +;; p2 p1 p0 +;; +;; Then, it propagates the carry (higher bits after bit 43) from lower limbs into higher limbs, +;; multiplying by 5 in case of the carry of p2. +;; +;; ============================================================================= +;A0 [in/out] ZMM register containing 1st 44-bit limb of the 8 blocks +;A1 [in/out] ZMM register containing 2nd 44-bit limb of the 8 blocks +;A2 [in/out] ZMM register containing 3rd 44-bit limb of the 8 blocks +;B0 [in/out] ZMM register containing 1st 44-bit limb of the 8 blocks +;B1 [in/out] ZMM register containing 2nd 44-bit limb of the 8 blocks +;B2 [in/out] ZMM register containing 3rd 44-bit limb of the 8 blocks +;R0 [in] ZMM register (R0) to include the 1st limb in IDX +;R1 [in] ZMM register (R1) to include the 2nd limb in IDX +;R2 [in] ZMM register (R2) to include the 3rd limb in IDX +;R1P [in] ZMM register (R1') to include the 2nd limb (multiplied by 5) in IDX +;R2P [in] ZMM register (R2') to include the 3rd limb (multiplied by 5) in IDX +;S0 [in] ZMM register (R0) to include the 1st limb in IDX +;S1 [in] ZMM register (R1) to include the 2nd limb in IDX +;S2 [in] ZMM register (R2) to include the 3rd limb in IDX +;S1P [in] ZMM register (R1') to include the 2nd limb (multiplied by 5) in IDX +;S2P [in] ZMM register (R2') to include the 3rd limb (multiplied by 5) in IDX +;P0_L [clobbered] ZMM register to contain p[0] of the 8 blocks +;P0_H [clobbered] ZMM register to contain p[0] of the 8 blocks +;P1_L [clobbered] ZMM register to contain p[1] of the 8 blocks +;P1_H [clobbered] ZMM register to contain p[1] of the 8 blocks +;P2_L [clobbered] ZMM register to contain p[2] of the 8 blocks +;P2_H [clobbered] ZMM register to contain p[2] of the 8 blocks +;Q0_L [clobbered] ZMM register to contain p[0] of the 8 blocks +;Q0_H [clobbered] ZMM register to contain p[0] of the 8 blocks +;Q1_L [clobbered] ZMM register to contain p[1] of the 8 blocks +;Q1_H [clobbered] ZMM register to contain p[1] of the 8 blocks +;Q2_L [clobbered] ZMM register to contain p[2] of the 8 blocks +;Q2_H [clobbered] ZMM register to contain p[2] of the 8 blocks +;ZTMP1 [clobbered] Temporary ZMM register +;ZTMP2 [clobbered] Temporary ZMM register +*/ +#define POLY1305_MUL_REDUCE_VEC16(A0, A1, A2, B0, B1, B2, R0, R1, R2, R1P, R2P,\ + S0, S1, S2, S1P, S2P, P0_L, P0_H, P1_L, P1_H,\ + P2_L, P2_H, Q0_L, Q0_H, Q1_L, Q1_H, Q2_L,\ + Q2_H, ZTMP1, ZTMP2) \ + /* ;; Reset accumulator */ \ + vpxorq P0_L, P0_L, P0_L; \ + vpxorq P0_H, P0_H, P0_H; \ + vpxorq P1_L, P1_L, P1_L; \ + vpxorq P1_H, P1_H, P1_H; \ + vpxorq P2_L, P2_L, P2_L; \ + vpxorq P2_H, P2_H, P2_H; \ + vpxorq Q0_L, Q0_L, Q0_L; \ + vpxorq Q0_H, Q0_H, Q0_H; \ + vpxorq Q1_L, Q1_L, Q1_L; \ + vpxorq Q1_H, Q1_H, Q1_H; \ + vpxorq Q2_L, Q2_L, Q2_L; \ + vpxorq Q2_H, Q2_H, Q2_H; \ + \ + /* ;; This code interleaves hash computation with input loading/splatting */ \ + \ + /* ; Calculate products */ \ + vpmadd52luq P0_L, A2, R1P; \ + vpmadd52huq P0_H, A2, R1P; \ + \ + vpmadd52luq Q0_L, B2, S1P; \ + vpmadd52huq Q0_H, B2, S1P; \ + \ + vpmadd52luq P1_L, A2, R2P; \ + vpmadd52huq P1_H, A2, R2P; \ + \ + vpmadd52luq Q1_L, B2, S2P; \ + vpmadd52huq Q1_H, B2, S2P; \ + \ + vpmadd52luq P0_L, A0, R0; \ + vpmadd52huq P0_H, A0, R0; \ + \ + vpmadd52luq Q0_L, B0, S0; \ + vpmadd52huq Q0_H, B0, S0; \ + \ + vpmadd52luq P2_L, A2, R0; \ + vpmadd52huq P2_H, A2, R0; \ + vpmadd52luq Q2_L, B2, S0; \ + vpmadd52huq Q2_H, B2, S0; \ + \ + vpmadd52luq P1_L, A0, R1; \ + vpmadd52huq P1_H, A0, R1; \ + vpmadd52luq Q1_L, B0, S1; \ + vpmadd52huq Q1_H, B0, S1; \ + \ + vpmadd52luq P0_L, A1, R2P; \ + vpmadd52huq P0_H, A1, R2P; \ + \ + vpmadd52luq Q0_L, B1, S2P; \ + vpmadd52huq Q0_H, B1, S2P; \ + \ + vpmadd52luq P2_L, A0, R2; \ + vpmadd52huq P2_H, A0, R2; \ + \ + vpmadd52luq Q2_L, B0, S2; \ + vpmadd52huq Q2_H, B0, S2; \ + \ + /* ; Carry propagation (first pass) */ \ + vpsrlq ZTMP1, P0_L, 44; \ + vpsllq P0_H, P0_H, 8; \ + vpsrlq ZTMP2, Q0_L, 44; \ + vpsllq Q0_H, Q0_H, 8; \ + \ + vpmadd52luq P1_L, A1, R0; \ + vpmadd52huq P1_H, A1, R0; \ + vpmadd52luq Q1_L, B1, S0; \ + vpmadd52huq Q1_H, B1, S0; \ + \ + /* ; Carry propagation (first pass) - continue */ \ + vpandq A0, P0_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpaddq P0_H, P0_H, ZTMP1; \ + vpandq B0, Q0_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpaddq Q0_H, Q0_H, ZTMP2; \ + \ + vpmadd52luq P2_L, A1, R1; \ + vpmadd52huq P2_H, A1, R1; \ + vpmadd52luq Q2_L, B1, S1; \ + vpmadd52huq Q2_H, B1, S1; \ + \ + /* ; Carry propagation (first pass) - continue */ \ + vpaddq P1_L, P1_L, P0_H; \ + vpsllq P1_H, P1_H, 8; \ + vpsrlq ZTMP1, P1_L, 44; \ + vpandq A1, P1_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpaddq Q1_L, Q1_L, Q0_H; \ + vpsllq Q1_H, Q1_H, 8; \ + vpsrlq ZTMP2, Q1_L, 44; \ + vpandq B1, Q1_L, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + \ + vpaddq P2_L, P2_L, P1_H; /* ; P2_L += P1_H + P1_L[63:44] */ \ + vpaddq P2_L, P2_L, ZTMP1; \ + vpandq A2, P2_L, [.Lmask_42 ADD_RIP]; /* ; Clear top 22 bits */ \ + vpsrlq ZTMP1, P2_L, 42; \ + vpsllq P2_H, P2_H, 10; \ + vpaddq P2_H, P2_H, ZTMP1; \ + \ + vpaddq Q2_L, Q2_L, Q1_H; /* ; Q2_L += P1_H + P1_L[63:44] */ \ + vpaddq Q2_L, Q2_L, ZTMP2; \ + vpandq B2, Q2_L, [.Lmask_42 ADD_RIP]; /* ; Clear top 22 bits */ \ + vpsrlq ZTMP2, Q2_L, 42; \ + vpsllq Q2_H, Q2_H, 10; \ + vpaddq Q2_H, Q2_H, ZTMP2; \ + \ + /* ; Carry propagation (second pass) */ \ + /* ; Multiply by 5 the highest bits (above 130 bits) */ \ + vpaddq A0, A0, P2_H; \ + vpsllq P2_H, P2_H, 2; \ + vpaddq A0, A0, P2_H; \ + vpaddq B0, B0, Q2_H; \ + vpsllq Q2_H, Q2_H, 2; \ + vpaddq B0, B0, Q2_H; \ + \ + vpsrlq ZTMP1, A0, 44; \ + vpandq A0, A0, [.Lmask_44 ADD_RIP]; \ + vpaddq A1, A1, ZTMP1; \ + vpsrlq ZTMP2, B0, 44; \ + vpandq B0, B0, [.Lmask_44 ADD_RIP]; \ + vpaddq B1, B1, ZTMP2; + +/* +;; ============================================================================= +;; ============================================================================= +;; Shuffle data blocks, so they match the right power of R. +;; Powers of R are in this order: R^8 R^4 R^7 R^3 R^6 R^2 R^5 R +;; Data blocks are coming in this order: A0 A4 A1 A5 A2 A6 A3 A7 +;; Generally the computation is: A0*R^8 + A1*R^7 + A2*R^6 + A3*R^5 + +;; A4*R^4 + A5*R^3 + A6*R^2 + A7*R +;; When there are less data blocks, less powers of R are used, so data needs to +;; be shuffled. Example: if 4 blocks are left, only A0-A3 are available and only +;; R-R^4 are used (A0*R^4 + A1*R^3 + A2*R^2 + A3*R), so A0-A3 need to be shifted +;; ============================================================================= +;A_L [in/out] 0-43 bits of input data +;A_M [in/out] 44-87 bits of input data +;A_H [in/out] 88-129 bits of input data +;TMP [clobbered] Temporary GP register +;N_BLOCKS [in] Number of remaining input blocks +*/ +#define SHUFFLE_DATA_SMASK_1 0x39 +#define SHUFFLE_DATA_KMASK_1 0xffff +#define SHUFFLE_DATA_SMASK_2 0x4E +#define SHUFFLE_DATA_KMASK_2 0xffff +#define SHUFFLE_DATA_SMASK_3 0x93 +#define SHUFFLE_DATA_KMASK_3 0xffff +#define SHUFFLE_DATA_KMASK_4 0xffff +#define SHUFFLE_DATA_SMASK_5 0x39 +#define SHUFFLE_DATA_KMASK_5 0xfff0 +#define SHUFFLE_DATA_SMASK_6 0x4E +#define SHUFFLE_DATA_KMASK_6 0xff00 +#define SHUFFLE_DATA_SMASK_7 0x93 +#define SHUFFLE_DATA_KMASK_7 0xf000 + +#define SHUFFLE_DATA_BLOCKS_GENERIC(A_L, A_M, A_H, TMP, N_BLOCKS) \ + mov TMP, SHUFFLE_DATA_KMASK_##N_BLOCKS; \ + kmovq k1, TMP; \ + vpshufd A_L{k1}, A_L, 0x4E; \ + vpshufd A_M{k1}, A_M, 0x4E; \ + vpshufd A_H{k1}, A_H, 0x4E; \ + vshufi64x2 A_L, A_L, A_L, SHUFFLE_DATA_SMASK_##N_BLOCKS; \ + vshufi64x2 A_M, A_M, A_M, SHUFFLE_DATA_SMASK_##N_BLOCKS; \ + vshufi64x2 A_H, A_H, A_H, SHUFFLE_DATA_SMASK_##N_BLOCKS + +#define SHUFFLE_DATA_BLOCKS_1(A_L, A_M, A_H, TMP) \ + SHUFFLE_DATA_BLOCKS_GENERIC(A_L, A_M, A_H, TMP, 1) + +#define SHUFFLE_DATA_BLOCKS_2(A_L, A_M, A_H, TMP) \ + SHUFFLE_DATA_BLOCKS_GENERIC(A_L, A_M, A_H, TMP, 2) + +#define SHUFFLE_DATA_BLOCKS_3(A_L, A_M, A_H, TMP) \ + SHUFFLE_DATA_BLOCKS_GENERIC(A_L, A_M, A_H, TMP, 3) + +#define SHUFFLE_DATA_BLOCKS_4(A_L, A_M, A_H, TMP) \ + mov TMP, SHUFFLE_DATA_KMASK_4; \ + kmovq k1, TMP; \ + vpshufd A_L{k1}, A_L, 0x4E; \ + vpshufd A_M{k1}, A_M, 0x4E; \ + vpshufd A_H{k1}, A_H, 0x4E; + +#define SHUFFLE_DATA_BLOCKS_5(A_L, A_M, A_H, TMP) \ + SHUFFLE_DATA_BLOCKS_GENERIC(A_L, A_M, A_H, TMP, 5) + +#define SHUFFLE_DATA_BLOCKS_6(A_L, A_M, A_H, TMP) \ + SHUFFLE_DATA_BLOCKS_GENERIC(A_L, A_M, A_H, TMP, 6) + +#define SHUFFLE_DATA_BLOCKS_7(A_L, A_M, A_H, TMP) \ + SHUFFLE_DATA_BLOCKS_GENERIC(A_L, A_M, A_H, TMP, 7) + +/* +;; ============================================================================= +;; ============================================================================= +;; Computes hash for message length being multiple of block size +;; ============================================================================= +;MSG [in/out] GPR pointer to input message (updated) +;LEN [in/out] GPR in: length in bytes / out: length mod 16 +;A0 [in/out] accumulator bits 63..0 +;A1 [in/out] accumulator bits 127..64 +;A2 [in/out] accumulator bits 195..128 +;R0 [in] R constant bits 63..0 +;R1 [in] R constant bits 127..64 +;T0 [clobbered] GPR register +;T1 [clobbered] GPR register +;T2 [clobbered] GPR register +;T3 [clobbered] GPR register +;GP_RAX [clobbered] RAX register +;GP_RDX [clobbered] RDX register +*/ +#define POLY1305_BLOCKS(MSG, LEN, A0, A1, A2, R0, R1, T0, T1, T2, T3, \ + GP_RAX, GP_RDX) \ + /* ; Minimum of 256 bytes to run vectorized code */ \ + cmp LEN, POLY1305_BLOCK_SIZE*16; \ + jb .L_final_loop; \ + \ + /* ; Spread accumulator into 44-bit limbs in quadwords */ \ + mov T0, A0; \ + and T0, [.Lmask_44 ADD_RIP]; /* ;; First limb (A[43:0]) */ \ + vmovq xmm5, T0; \ + \ + mov T0, A1; \ + shrd A0, T0, 44; \ + and A0, [.Lmask_44 ADD_RIP]; /* ;; Second limb (A[77:52]) */ \ + vmovq xmm6, A0; \ + \ + shrd A1, A2, 24; \ + and A1, [.Lmask_42 ADD_RIP]; /* ;; Third limb (A[129:88]) */ \ + vmovq xmm7, A1; \ + \ + /* ; Load first block of data (128 bytes) */ \ + vmovdqu64 zmm0, [MSG]; \ + vmovdqu64 zmm1, [MSG + 64]; \ + \ + /* ; Interleave the data to form 44-bit limbs */ \ + /* ; */ \ + /* ; zmm13 to have bits 0-43 of all 8 blocks in 8 qwords */ \ + /* ; zmm14 to have bits 87-44 of all 8 blocks in 8 qwords */ \ + /* ; zmm15 to have bits 127-88 of all 8 blocks in 8 qwords */ \ + vpunpckhqdq zmm15, zmm0, zmm1; \ + vpunpcklqdq zmm13, zmm0, zmm1; \ + \ + vpsrlq zmm14, zmm13, 44; \ + vpsllq zmm18, zmm15, 20; \ + vpternlogq zmm14, zmm18, [.Lmask_44 ADD_RIP], 0xA8; /* ; (A OR B AND C) */ \ + \ + vpandq zmm13, zmm13, [.Lmask_44 ADD_RIP]; \ + vpsrlq zmm15, zmm15, 24; \ + \ + /* ; Add 2^128 to all 8 final qwords of the message */ \ + vporq zmm15, zmm15, [.Lhigh_bit ADD_RIP]; \ + \ + vpaddq zmm13, zmm13, zmm5; \ + vpaddq zmm14, zmm14, zmm6; \ + vpaddq zmm15, zmm15, zmm7; \ + \ + /* ; Load next blocks of data (128 bytes) */ \ + vmovdqu64 zmm0, [MSG + 64*2]; \ + vmovdqu64 zmm1, [MSG + 64*3]; \ + \ + /* ; Interleave the data to form 44-bit limbs */ \ + /* ; */ \ + /* ; zmm13 to have bits 0-43 of all 8 blocks in 8 qwords */ \ + /* ; zmm14 to have bits 87-44 of all 8 blocks in 8 qwords */ \ + /* ; zmm15 to have bits 127-88 of all 8 blocks in 8 qwords */ \ + vpunpckhqdq zmm18, zmm0, zmm1; \ + vpunpcklqdq zmm16, zmm0, zmm1; \ + \ + vpsrlq zmm17, zmm16, 44; \ + vpsllq zmm19, zmm18, 20; \ + vpternlogq zmm17, zmm19, [.Lmask_44 ADD_RIP], 0xA8; /* ; (A OR B AND C) */ \ + \ + vpandq zmm16, zmm16, [.Lmask_44 ADD_RIP]; \ + vpsrlq zmm18, zmm18, 24; \ + \ + /* ; Add 2^128 to all 8 final qwords of the message */ \ + vporq zmm18, zmm18, [.Lhigh_bit ADD_RIP]; \ + \ + /* ; Use memory in stack to save powers of R, before loading them into ZMM registers */ \ + /* ; The first 16*8 bytes will contain the 16 bytes of the 8 powers of R */ \ + /* ; The last 64 bytes will contain the last 2 bits of powers of R, spread in 8 qwords, */ \ + /* ; to be OR'd with the highest qwords (in zmm26) */ \ + vmovq xmm3, R0; \ + vpinsrq xmm3, xmm3, R1, 1; \ + vinserti32x4 zmm1, zmm1, xmm3, 3; \ + \ + vpxorq zmm0, zmm0, zmm0; \ + vpxorq zmm2, zmm2, zmm2; \ + \ + /* ; Calculate R^2 */ \ + mov T0, R1; \ + shr T0, 2; \ + add T0, R1; /* ;; T0 = R1 + (R1 >> 2) */ \ + \ + mov A0, R0; \ + mov A1, R1; \ + \ + POLY1305_MUL_REDUCE(A0, A1, A2, R0, R1, T0, T1, T2, T3, GP_RAX, GP_RDX, A2_ZERO); \ + \ + vmovq xmm3, A0; \ + vpinsrq xmm3, xmm3, A1, 1; \ + vinserti32x4 zmm1, zmm1, xmm3, 2; \ + \ + vmovq xmm4, A2; \ + vinserti32x4 zmm2, zmm2, xmm4, 2; \ + \ + /* ; Calculate R^3 */ \ + POLY1305_MUL_REDUCE(A0, A1, A2, R0, R1, T0, T1, T2, T3, GP_RAX, GP_RDX, A2_NOT_ZERO); \ + \ + vmovq xmm3, A0; \ + vpinsrq xmm3, xmm3, A1, 1; \ + vinserti32x4 zmm1, zmm1, xmm3, 1; \ + \ + vmovq xmm4, A2; \ + vinserti32x4 zmm2, zmm2, xmm4, 1; \ + \ + /* ; Calculate R^4 */ \ + POLY1305_MUL_REDUCE(A0, A1, A2, R0, R1, T0, T1, T2, T3, GP_RAX, GP_RDX, A2_NOT_ZERO); \ + \ + vmovq xmm3, A0; \ + vpinsrq xmm3, xmm3, A1, 1; \ + vinserti32x4 zmm1, zmm1, xmm3, 0; \ + \ + vmovq xmm4, A2; \ + vinserti32x4 zmm2, zmm2, xmm4, 0; \ + \ + /* ; Move 2 MSbits to top 24 bits, to be OR'ed later */ \ + vpsllq zmm2, zmm2, 40; \ + \ + vpunpckhqdq zmm21, zmm1, zmm0; \ + vpunpcklqdq zmm19, zmm1, zmm0; \ + \ + vpsrlq zmm20, zmm19, 44; \ + vpsllq zmm4, zmm21, 20; \ + vpternlogq zmm20, zmm4, [.Lmask_44 ADD_RIP], 0xA8; /* ; (A OR B AND C) */ \ + \ + vpandq zmm19, zmm19, [.Lmask_44 ADD_RIP]; \ + vpsrlq zmm21, zmm21, 24; \ + \ + /* ; zmm2 contains the 2 highest bits of the powers of R */ \ + vporq zmm21, zmm21, zmm2; \ + \ + /* ; Broadcast 44-bit limbs of R^4 */ \ + mov T0, A0; \ + and T0, [.Lmask_44 ADD_RIP]; /* ;; First limb (R^4[43:0]) */ \ + vpbroadcastq zmm22, T0; \ + \ + mov T0, A1; \ + shrd A0, T0, 44; \ + and A0, [.Lmask_44 ADD_RIP]; /* ;; Second limb (R^4[87:44]) */ \ + vpbroadcastq zmm23, A0; \ + \ + shrd A1, A2, 24; \ + and A1, [.Lmask_42 ADD_RIP]; /* ;; Third limb (R^4[129:88]) */ \ + vpbroadcastq zmm24, A1; \ + \ + /* ; Generate 4*5*R^4 */ \ + vpsllq zmm25, zmm23, 2; \ + vpsllq zmm26, zmm24, 2; \ + \ + /* ; 5*R^4 */ \ + vpaddq zmm25, zmm25, zmm23; \ + vpaddq zmm26, zmm26, zmm24; \ + \ + /* ; 4*5*R^4 */ \ + vpsllq zmm25, zmm25, 2; \ + vpsllq zmm26, zmm26, 2; \ + \ + vpslldq zmm29, zmm19, 8; \ + vpslldq zmm30, zmm20, 8; \ + vpslldq zmm31, zmm21, 8; \ + \ + /* ; Calculate R^8-R^5 */ \ + POLY1305_MUL_REDUCE_VEC(zmm19, zmm20, zmm21, \ + zmm22, zmm23, zmm24, \ + zmm25, zmm26, \ + zmm5, zmm6, zmm7, zmm8, zmm9, zmm10, \ + zmm11); \ + \ + /* ; Interleave powers of R: R^8 R^4 R^7 R^3 R^6 R^2 R^5 R */ \ + vporq zmm19, zmm19, zmm29; \ + vporq zmm20, zmm20, zmm30; \ + vporq zmm21, zmm21, zmm31; \ + \ + /* ; Broadcast R^8 */ \ + vpbroadcastq zmm22, xmm19; \ + vpbroadcastq zmm23, xmm20; \ + vpbroadcastq zmm24, xmm21; \ + \ + /* ; Generate 4*5*R^8 */ \ + vpsllq zmm25, zmm23, 2; \ + vpsllq zmm26, zmm24, 2; \ + \ + /* ; 5*R^8 */ \ + vpaddq zmm25, zmm25, zmm23; \ + vpaddq zmm26, zmm26, zmm24; \ + \ + /* ; 4*5*R^8 */ \ + vpsllq zmm25, zmm25, 2; \ + vpsllq zmm26, zmm26, 2; \ + \ + cmp LEN, POLY1305_BLOCK_SIZE*32; \ + jb .L_len_256_511; \ + \ + /* ; Store R^8-R for later use */ \ + vmovdqa64 [rsp + STACK_r_save], zmm19; \ + vmovdqa64 [rsp + STACK_r_save + 64], zmm20; \ + vmovdqa64 [rsp + STACK_r_save + 64*2], zmm21; \ + \ + /* ; Calculate R^16-R^9 */ \ + POLY1305_MUL_REDUCE_VEC(zmm19, zmm20, zmm21, \ + zmm22, zmm23, zmm24, \ + zmm25, zmm26, \ + zmm5, zmm6, zmm7, zmm8, zmm9, zmm10, \ + zmm11); \ + \ + /* ; Store R^16-R^9 for later use */ \ + vmovdqa64 [rsp + STACK_r_save + 64*3], zmm19; \ + vmovdqa64 [rsp + STACK_r_save + 64*4], zmm20; \ + vmovdqa64 [rsp + STACK_r_save + 64*5], zmm21; \ + \ + /* ; Broadcast R^16 */ \ + vpbroadcastq zmm22, xmm19; \ + vpbroadcastq zmm23, xmm20; \ + vpbroadcastq zmm24, xmm21; \ + \ + /* ; Generate 4*5*R^16 */ \ + vpsllq zmm25, zmm23, 2; \ + vpsllq zmm26, zmm24, 2; \ + \ + /* ; 5*R^16 */ \ + vpaddq zmm25, zmm25, zmm23; \ + vpaddq zmm26, zmm26, zmm24; \ + \ + /* ; 4*5*R^16 */ \ + vpsllq zmm25, zmm25, 2; \ + vpsllq zmm26, zmm26, 2; \ + \ + mov T0, LEN; \ + and T0, 0xffffffffffffff00; /* ; multiple of 256 bytes */ \ + \ +.L_poly1305_blocks_loop: \ + cmp T0, POLY1305_BLOCK_SIZE*16; \ + jbe .L_poly1305_blocks_loop_end; \ + \ + /* ; zmm13-zmm18 contain the 16 blocks of message plus the previous accumulator */ \ + /* ; zmm22-24 contain the 5x44-bit limbs of the powers of R */ \ + /* ; zmm25-26 contain the 5x44-bit limbs of the powers of R' (5*4*R) */ \ + POLY1305_MSG_MUL_REDUCE_VEC16(zmm13, zmm14, zmm15, zmm16, zmm17, zmm18, \ + zmm22, zmm23, zmm24, zmm25, zmm26, \ + zmm5, zmm6, zmm7, zmm8, zmm9, zmm10, \ + zmm19, zmm20, zmm21, zmm27, zmm28, zmm29, \ + zmm30, zmm31, zmm11, zmm0, zmm1, \ + zmm2, zmm3, zmm4, zmm12, MSG, T0); \ + \ + jmp .L_poly1305_blocks_loop; \ + \ +.L_poly1305_blocks_loop_end: \ + \ + /* ;; Need to multiply by r^16, r^15, r^14... r */ \ + \ + /* ; First multiply by r^16-r^9 */ \ + \ + /* ; Read R^16-R^9 */ \ + vmovdqa64 zmm19, [rsp + STACK_r_save + 64*3]; \ + vmovdqa64 zmm20, [rsp + STACK_r_save + 64*4]; \ + vmovdqa64 zmm21, [rsp + STACK_r_save + 64*5]; \ + /* ; Read R^8-R */ \ + vmovdqa64 zmm22, [rsp + STACK_r_save]; \ + vmovdqa64 zmm23, [rsp + STACK_r_save + 64]; \ + vmovdqa64 zmm24, [rsp + STACK_r_save + 64*2]; \ + \ + /* ; zmm27 to have bits 87-44 of all 9-16th powers of R' in 8 qwords */ \ + /* ; zmm28 to have bits 129-88 of all 9-16th powers of R' in 8 qwords */ \ + vpsllq zmm0, zmm20, 2; \ + vpaddq zmm27, zmm20, zmm0; /* ; R1' (R1*5) */ \ + vpsllq zmm1, zmm21, 2; \ + vpaddq zmm28, zmm21, zmm1; /* ; R2' (R2*5) */ \ + \ + /* ; 4*5*R */ \ + vpsllq zmm27, zmm27, 2; \ + vpsllq zmm28, zmm28, 2; \ + \ + /* ; Then multiply by r^8-r */ \ + \ + /* ; zmm25 to have bits 87-44 of all 1-8th powers of R' in 8 qwords */ \ + /* ; zmm26 to have bits 129-88 of all 1-8th powers of R' in 8 qwords */ \ + vpsllq zmm2, zmm23, 2; \ + vpaddq zmm25, zmm23, zmm2; /* ; R1' (R1*5) */ \ + vpsllq zmm3, zmm24, 2; \ + vpaddq zmm26, zmm24, zmm3; /* ; R2' (R2*5) */ \ + \ + /* ; 4*5*R */ \ + vpsllq zmm25, zmm25, 2; \ + vpsllq zmm26, zmm26, 2; \ + \ + POLY1305_MUL_REDUCE_VEC16(zmm13, zmm14, zmm15, zmm16, zmm17, zmm18, \ + zmm19, zmm20, zmm21, zmm27, zmm28, \ + zmm22, zmm23, zmm24, zmm25, zmm26, \ + zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, \ + zmm7, zmm8, zmm9, zmm10, zmm11, zmm12, zmm29); \ + \ + /* ;; Add all blocks (horizontally) */ \ + vpaddq zmm13, zmm13, zmm16; \ + vpaddq zmm14, zmm14, zmm17; \ + vpaddq zmm15, zmm15, zmm18; \ + \ + vextracti64x4 ymm0, zmm13, 1; \ + vextracti64x4 ymm1, zmm14, 1; \ + vextracti64x4 ymm2, zmm15, 1; \ + \ + vpaddq ymm13, ymm13, ymm0; \ + vpaddq ymm14, ymm14, ymm1; \ + vpaddq ymm15, ymm15, ymm2; \ + \ + vextracti32x4 xmm10, ymm13, 1; \ + vextracti32x4 xmm11, ymm14, 1; \ + vextracti32x4 xmm12, ymm15, 1; \ + \ + vpaddq xmm13, xmm13, xmm10; \ + vpaddq xmm14, xmm14, xmm11; \ + vpaddq xmm15, xmm15, xmm12; \ + \ + vpsrldq xmm10, xmm13, 8; \ + vpsrldq xmm11, xmm14, 8; \ + vpsrldq xmm12, xmm15, 8; \ + \ + /* ; Finish folding and clear second qword */ \ + mov T0, 0xfd; \ + kmovq k1, T0; \ + vpaddq xmm13{k1}{z}, xmm13, xmm10; \ + vpaddq xmm14{k1}{z}, xmm14, xmm11; \ + vpaddq xmm15{k1}{z}, xmm15, xmm12; \ + \ + add MSG, POLY1305_BLOCK_SIZE*16; \ + \ + and LEN, (POLY1305_BLOCK_SIZE*16 - 1); /* ; Get remaining lengths (LEN < 256 bytes) */ \ + \ +.L_less_than_256: \ + \ + cmp LEN, POLY1305_BLOCK_SIZE*8; \ + jb .L_less_than_128; \ + \ + /* ; Read next 128 bytes */ \ + /* ; Load first block of data (128 bytes) */ \ + vmovdqu64 zmm0, [MSG]; \ + vmovdqu64 zmm1, [MSG + 64]; \ + \ + /* ; Interleave the data to form 44-bit limbs */ \ + /* ; */ \ + /* ; zmm13 to have bits 0-43 of all 8 blocks in 8 qwords */ \ + /* ; zmm14 to have bits 87-44 of all 8 blocks in 8 qwords */ \ + /* ; zmm15 to have bits 127-88 of all 8 blocks in 8 qwords */ \ + vpunpckhqdq zmm5, zmm0, zmm1; \ + vpunpcklqdq zmm3, zmm0, zmm1; \ + \ + vpsrlq zmm4, zmm3, 44; \ + vpsllq zmm8, zmm5, 20; \ + vpternlogq zmm4, zmm8, [.Lmask_44 ADD_RIP], 0xA8; /* ; (A OR B AND C) */ \ + \ + vpandq zmm3, zmm3, [.Lmask_44 ADD_RIP]; \ + vpsrlq zmm5, zmm5, 24; \ + \ + /* ; Add 2^128 to all 8 final qwords of the message */ \ + vporq zmm5, zmm5, [.Lhigh_bit ADD_RIP]; \ + \ + vpaddq zmm13, zmm13, zmm3; \ + vpaddq zmm14, zmm14, zmm4; \ + vpaddq zmm15, zmm15, zmm5; \ + \ + add MSG, POLY1305_BLOCK_SIZE*8; \ + sub LEN, POLY1305_BLOCK_SIZE*8; \ + \ + POLY1305_MUL_REDUCE_VEC(zmm13, zmm14, zmm15, \ + zmm22, zmm23, zmm24, \ + zmm25, zmm26, \ + zmm5, zmm6, zmm7, zmm8, zmm9, zmm10, \ + zmm11); \ + \ + /* ;; Add all blocks (horizontally) */ \ + vextracti64x4 ymm0, zmm13, 1; \ + vextracti64x4 ymm1, zmm14, 1; \ + vextracti64x4 ymm2, zmm15, 1; \ + \ + vpaddq ymm13, ymm13, ymm0; \ + vpaddq ymm14, ymm14, ymm1; \ + vpaddq ymm15, ymm15, ymm2; \ + \ + vextracti32x4 xmm10, ymm13, 1; \ + vextracti32x4 xmm11, ymm14, 1; \ + vextracti32x4 xmm12, ymm15, 1; \ + \ + vpaddq xmm13, xmm13, xmm10; \ + vpaddq xmm14, xmm14, xmm11; \ + vpaddq xmm15, xmm15, xmm12; \ + \ + vpsrldq xmm10, xmm13, 8; \ + vpsrldq xmm11, xmm14, 8; \ + vpsrldq xmm12, xmm15, 8; \ + \ + /* ; Finish folding and clear second qword */ \ + mov T0, 0xfd; \ + kmovq k1, T0; \ + vpaddq xmm13{k1}{z}, xmm13, xmm10; \ + vpaddq xmm14{k1}{z}, xmm14, xmm11; \ + vpaddq xmm15{k1}{z}, xmm15, xmm12; \ + \ +.L_less_than_128: \ + cmp LEN, 32; /* ; If remaining bytes is <= 32, perform last blocks in scalar */ \ + jbe .L_simd_to_gp; \ + \ + mov T0, LEN; \ + and T0, 0x3f; \ + lea T1, [.Lbyte64_len_to_mask_table ADD_RIP]; \ + mov T1, [T1 + 8*T0]; \ + \ + /* ; Load default byte masks */ \ + mov T2, 0xffffffffffffffff; \ + xor T3, T3; \ + \ + cmp LEN, 64; \ + cmovb T2, T1; /* ; Load mask for first 64 bytes */ \ + cmovg T3, T1; /* ; Load mask for second 64 bytes */ \ + \ + kmovq k1, T2; \ + kmovq k2, T3; \ + vmovdqu8 zmm0{k1}{z}, [MSG]; \ + vmovdqu8 zmm1{k2}{z}, [MSG + 64]; \ + \ + /* ; Pad last block message, if partial */ \ + mov T0, LEN; \ + and T0, 0x70; /* ; Multiple of 16 bytes */ \ + /* ; Load last block of data (up to 112 bytes) */ \ + shr T0, 3; /* ; Get number of full qwords */ \ + \ + /* ; Interleave the data to form 44-bit limbs */ \ + /* ; */ \ + /* ; zmm13 to have bits 0-43 of all 8 blocks in 8 qwords */ \ + /* ; zmm14 to have bits 87-44 of all 8 blocks in 8 qwords */ \ + /* ; zmm15 to have bits 127-88 of all 8 blocks in 8 qwords */ \ + vpunpckhqdq zmm4, zmm0, zmm1; \ + vpunpcklqdq zmm2, zmm0, zmm1; \ + \ + vpsrlq zmm3, zmm2, 44; \ + vpsllq zmm28, zmm4, 20; \ + vpternlogq zmm3, zmm28, [.Lmask_44 ADD_RIP], 0xA8; /* ; (A OR B AND C) */ \ + \ + vpandq zmm2, zmm2, [.Lmask_44 ADD_RIP]; \ + vpsrlq zmm4, zmm4, 24; \ + \ + lea T1, [.Lqword_high_bit_mask ADD_RIP]; \ + kmovb k1, [T1 + T0]; \ + /* ; Add 2^128 to final qwords of the message (all full blocks and partial block, */ \ + /* ; if "pad_to_16" is selected) */ \ + vporq zmm4{k1}, zmm4, [.Lhigh_bit ADD_RIP]; \ + \ + vpaddq zmm13, zmm13, zmm2; \ + vpaddq zmm14, zmm14, zmm3; \ + vpaddq zmm15, zmm15, zmm4; \ + \ + mov T0, LEN; \ + add T0, 15; \ + shr T0, 4; /* ; Get number of 16-byte blocks (including partial blocks) */ \ + xor LEN, LEN; /* ; All length will be consumed */ \ + \ + /* ; No need to shuffle data blocks (data is in the right order) */ \ + cmp T0, 8; \ + je .L_end_shuffle; \ + \ + cmp T0, 4; \ + je .L_shuffle_blocks_4; \ + jb .L_shuffle_blocks_3; \ + \ + /* ; Number of 16-byte blocks > 4 */ \ + cmp T0, 6; \ + je .L_shuffle_blocks_6; \ + ja .L_shuffle_blocks_7; \ + jmp .L_shuffle_blocks_5; \ + \ +.L_shuffle_blocks_3: \ + SHUFFLE_DATA_BLOCKS_3(zmm13, zmm14, zmm15, T1); \ + jmp .L_end_shuffle; \ +.L_shuffle_blocks_4: \ + SHUFFLE_DATA_BLOCKS_4(zmm13, zmm14, zmm15, T1); \ + jmp .L_end_shuffle; \ +.L_shuffle_blocks_5: \ + SHUFFLE_DATA_BLOCKS_5(zmm13, zmm14, zmm15, T1); \ + jmp .L_end_shuffle; \ +.L_shuffle_blocks_6: \ + SHUFFLE_DATA_BLOCKS_6(zmm13, zmm14, zmm15, T1); \ + jmp .L_end_shuffle; \ +.L_shuffle_blocks_7: \ + SHUFFLE_DATA_BLOCKS_7(zmm13, zmm14, zmm15, T1); \ + \ +.L_end_shuffle: \ + \ + /* ; zmm13-zmm15 contain the 8 blocks of message plus the previous accumulator */ \ + /* ; zmm22-24 contain the 3x44-bit limbs of the powers of R */ \ + /* ; zmm25-26 contain the 3x44-bit limbs of the powers of R' (5*4*R) */ \ + POLY1305_MUL_REDUCE_VEC(zmm13, zmm14, zmm15, \ + zmm22, zmm23, zmm24, \ + zmm25, zmm26, \ + zmm5, zmm6, zmm7, zmm8, zmm9, zmm10, \ + zmm11); \ + \ + /* ;; Add all blocks (horizontally) */ \ + vextracti64x4 ymm0, zmm13, 1; \ + vextracti64x4 ymm1, zmm14, 1; \ + vextracti64x4 ymm2, zmm15, 1; \ + \ + vpaddq ymm13, ymm13, ymm0; \ + vpaddq ymm14, ymm14, ymm1; \ + vpaddq ymm15, ymm15, ymm2; \ + \ + vextracti32x4 xmm10, ymm13, 1; \ + vextracti32x4 xmm11, ymm14, 1; \ + vextracti32x4 xmm12, ymm15, 1; \ + \ + vpaddq xmm13, xmm13, xmm10; \ + vpaddq xmm14, xmm14, xmm11; \ + vpaddq xmm15, xmm15, xmm12; \ + \ + vpsrldq xmm10, xmm13, 8; \ + vpsrldq xmm11, xmm14, 8; \ + vpsrldq xmm12, xmm15, 8; \ + \ + vpaddq xmm13, xmm13, xmm10; \ + vpaddq xmm14, xmm14, xmm11; \ + vpaddq xmm15, xmm15, xmm12; \ + \ +.L_simd_to_gp: \ + /* ; Carry propagation */ \ + vpsrlq xmm0, xmm13, 44; \ + vpandq xmm13, xmm13, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpaddq xmm14, xmm14, xmm0; \ + vpsrlq xmm0, xmm14, 44; \ + vpandq xmm14, xmm14, [.Lmask_44 ADD_RIP]; /* ; Clear top 20 bits */ \ + vpaddq xmm15, xmm15, xmm0; \ + vpsrlq xmm0, xmm15, 42; \ + vpandq xmm15, xmm15, [.Lmask_42 ADD_RIP]; /* ; Clear top 22 bits */ \ + vpsllq xmm1, xmm0, 2; \ + vpaddq xmm0, xmm0, xmm1; \ + vpaddq xmm13, xmm13, xmm0; \ + \ + /* ; Put together A */ \ + vmovq A0, xmm13; \ + \ + vmovq T0, xmm14; \ + mov T1, T0; \ + shl T1, 44; \ + or A0, T1; \ + \ + shr T0, 20; \ + vmovq A2, xmm15; \ + mov A1, A2; \ + shl A1, 24; \ + or A1, T0; \ + shr A2, 40; \ + \ + /* ; Clear powers of R */ \ + vpxorq zmm0, zmm0, zmm0; \ + vmovdqa64 [rsp + STACK_r_save], zmm0; \ + vmovdqa64 [rsp + STACK_r_save + 64], zmm0; \ + vmovdqa64 [rsp + STACK_r_save + 64*2], zmm0; \ + vmovdqa64 [rsp + STACK_r_save + 64*3], zmm0; \ + vmovdqa64 [rsp + STACK_r_save + 64*4], zmm0; \ + vmovdqa64 [rsp + STACK_r_save + 64*5], zmm0; \ + \ + vzeroall; \ + clear_zmm(xmm16); clear_zmm(xmm20); clear_zmm(xmm24); clear_zmm(xmm28); \ + clear_zmm(xmm17); clear_zmm(xmm21); clear_zmm(xmm25); clear_zmm(xmm29); \ + clear_zmm(xmm18); clear_zmm(xmm22); clear_zmm(xmm26); clear_zmm(xmm30); \ + clear_zmm(xmm19); clear_zmm(xmm23); clear_zmm(xmm27); clear_zmm(xmm31); \ + \ +.L_final_loop: \ + cmp LEN, POLY1305_BLOCK_SIZE; \ + jb .L_poly1305_blocks_exit; \ + \ + /* ;; A += MSG[i] */ \ + add A0, [MSG + 0]; \ + adc A1, [MSG + 8]; \ + adc A2, 1; /* ;; no padding bit */ \ + \ + mov T0, R1; \ + shr T0, 2; \ + add T0, R1; /* ;; T0 = R1 + (R1 >> 2) */ \ + \ + POLY1305_MUL_REDUCE(A0, A1, A2, R0, R1, \ + T0, T1, T2, T3, GP_RAX, GP_RDX, A2_NOT_ZERO); \ + \ + add MSG, POLY1305_BLOCK_SIZE; \ + sub LEN, POLY1305_BLOCK_SIZE; \ + \ + jmp .L_final_loop; \ + \ +.L_len_256_511: \ + \ + /* ; zmm13-zmm15 contain the 8 blocks of message plus the previous accumulator */ \ + /* ; zmm22-24 contain the 3x44-bit limbs of the powers of R */ \ + /* ; zmm25-26 contain the 3x44-bit limbs of the powers of R' (5*4*R) */ \ + POLY1305_MUL_REDUCE_VEC(zmm13, zmm14, zmm15, \ + zmm22, zmm23, zmm24, \ + zmm25, zmm26, \ + zmm5, zmm6, zmm7, zmm8, zmm9, zmm10, \ + zmm11); \ + \ + /* ; Then multiply by r^8-r */ \ + \ + /* ; zmm19-zmm21 contains R^8-R, need to move it to zmm22-24, */ \ + /* ; as it might be used in other part of the code */ \ + vmovdqa64 zmm22, zmm19; \ + vmovdqa64 zmm23, zmm20; \ + vmovdqa64 zmm24, zmm21; \ + \ + /* ; zmm25 to have bits 87-44 of all 8 powers of R' in 8 qwords */ \ + /* ; zmm26 to have bits 129-88 of all 8 powers of R' in 8 qwords */ \ + vpsllq zmm0, zmm23, 2; \ + vpaddq zmm25, zmm23, zmm0; /* ; R1' (R1*5) */ \ + vpsllq zmm1, zmm24, 2; \ + vpaddq zmm26, zmm24, zmm1; /* ; R2' (R2*5) */ \ + \ + /* ; 4*5*R^8 */ \ + vpsllq zmm25, zmm25, 2; \ + vpsllq zmm26, zmm26, 2; \ + \ + vpaddq zmm13, zmm13, zmm16; \ + vpaddq zmm14, zmm14, zmm17; \ + vpaddq zmm15, zmm15, zmm18; \ + \ + /* ; zmm13-zmm15 contain the 8 blocks of message plus the previous accumulator */ \ + /* ; zmm22-24 contain the 3x44-bit limbs of the powers of R */ \ + /* ; zmm25-26 contain the 3x44-bit limbs of the powers of R' (5*4*R) */ \ + POLY1305_MUL_REDUCE_VEC(zmm13, zmm14, zmm15, \ + zmm22, zmm23, zmm24, \ + zmm25, zmm26, \ + zmm5, zmm6, zmm7, zmm8, zmm9, zmm10, \ + zmm11); \ + \ + /* ;; Add all blocks (horizontally) */ \ + vextracti64x4 ymm0, zmm13, 1; \ + vextracti64x4 ymm1, zmm14, 1; \ + vextracti64x4 ymm2, zmm15, 1; \ + \ + vpaddq ymm13, ymm13, ymm0; \ + vpaddq ymm14, ymm14, ymm1; \ + vpaddq ymm15, ymm15, ymm2; \ + \ + vextracti32x4 xmm10, ymm13, 1; \ + vextracti32x4 xmm11, ymm14, 1; \ + vextracti32x4 xmm12, ymm15, 1; \ + \ + vpaddq xmm13, xmm13, xmm10; \ + vpaddq xmm14, xmm14, xmm11; \ + vpaddq xmm15, xmm15, xmm12; \ + \ + vpsrldq xmm10, xmm13, 8; \ + vpsrldq xmm11, xmm14, 8; \ + vpsrldq xmm12, xmm15, 8; \ + \ + /* ; Finish folding and clear second qword */ \ + mov T0, 0xfd; \ + kmovq k1, T0; \ + vpaddq xmm13{k1}{z}, xmm13, xmm10; \ + vpaddq xmm14{k1}{z}, xmm14, xmm11; \ + vpaddq xmm15{k1}{z}, xmm15, xmm12; \ + \ + add MSG, POLY1305_BLOCK_SIZE*16; \ + sub LEN, POLY1305_BLOCK_SIZE*16; \ + \ + jmp .L_less_than_256; \ +.L_poly1305_blocks_exit: \ + +/* +;; ============================================================================= +;; ============================================================================= +;; Creates stack frame and saves registers +;; ============================================================================= +*/ +#define FUNC_ENTRY() \ + mov rax, rsp; \ + CFI_DEF_CFA_REGISTER(rax); \ + sub rsp, STACK_SIZE; \ + and rsp, -64; \ + \ + mov [rsp + STACK_gpr_save + 8*0], rbx; \ + mov [rsp + STACK_gpr_save + 8*1], rbp; \ + mov [rsp + STACK_gpr_save + 8*2], r12; \ + mov [rsp + STACK_gpr_save + 8*3], r13; \ + mov [rsp + STACK_gpr_save + 8*4], r14; \ + mov [rsp + STACK_gpr_save + 8*5], r15; \ + mov [rsp + STACK_rsp_save], rax; \ + CFI_CFA_ON_STACK(STACK_rsp_save, 0) + +/* +;; ============================================================================= +;; ============================================================================= +;; Restores registers and removes the stack frame +;; ============================================================================= +*/ +#define FUNC_EXIT() \ + mov rbx, [rsp + STACK_gpr_save + 8*0]; \ + mov rbp, [rsp + STACK_gpr_save + 8*1]; \ + mov r12, [rsp + STACK_gpr_save + 8*2]; \ + mov r13, [rsp + STACK_gpr_save + 8*3]; \ + mov r14, [rsp + STACK_gpr_save + 8*4]; \ + mov r15, [rsp + STACK_gpr_save + 8*5]; \ + mov rsp, [rsp + STACK_rsp_save]; \ + CFI_DEF_CFA_REGISTER(rsp) + +/* +;; ============================================================================= +;; ============================================================================= +;; void poly1305_aead_update_fma_avx512(const void *msg, const uint64_t msg_len, +;; void *hash, const void *key) +;; arg1 - Input message +;; arg2 - Message length +;; arg3 - Input/output hash +;; arg4 - Poly1305 key +*/ +.align 32 +.globl _gcry_poly1305_amd64_avx512_blocks +ELF(.type _gcry_poly1305_amd64_avx512_blocks,@function;) +_gcry_poly1305_amd64_avx512_blocks: + CFI_STARTPROC() + vpxord xmm16, xmm16, xmm16; + vpopcntb zmm16, zmm16; /* spec stop for old AVX512 CPUs */ + FUNC_ENTRY() + +#define _a0 gp3 +#define _a0 gp3 +#define _a1 gp4 +#define _a2 gp5 +#define _r0 gp6 +#define _r1 gp7 +#define _len arg2 +#define _arg3 arg4 /* ; use rcx, arg3 = rdx */ + + /* ;; load R */ + mov _r0, [arg4 + 0 * 8] + mov _r1, [arg4 + 1 * 8] + + /* ;; load accumulator / current hash value */ + /* ;; note: arg4 can't be used beyond this point */ + mov _arg3, arg3 /* ; note: _arg3 = arg4 (linux) */ + mov _a0, [_arg3 + 0 * 8] + mov _a1, [_arg3 + 1 * 8] + mov DWORD(_a2), [_arg3 + 2 * 8] /* ; note: _a2 = arg4 (win) */ + + POLY1305_BLOCKS(arg1, _len, _a0, _a1, _a2, _r0, _r1, + gp10, gp11, gp8, gp9, rax, rdx) + + /* ;; save accumulator back */ + mov [_arg3 + 0 * 8], _a0 + mov [_arg3 + 1 * 8], _a1 + mov [_arg3 + 2 * 8], DWORD(_a2) + + FUNC_EXIT() + xor eax, eax + kmovw k1, eax + kmovw k2, eax + ret_spec_stop + CFI_ENDPROC() +ELF(.size _gcry_poly1305_amd64_avx512_blocks, + .-_gcry_poly1305_amd64_avx512_blocks;) + +#endif +#endif diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h index 19cee5f6..9e01df46 100644 --- a/cipher/poly1305-internal.h +++ b/cipher/poly1305-internal.h @@ -1,64 +1,77 @@ /* poly1305-internal.h - Poly1305 internals * Copyright (C) 2014 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifndef G10_POLY1305_INTERNAL_H #define G10_POLY1305_INTERNAL_H #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #define POLY1305_TAGLEN 16 #define POLY1305_KEYLEN 32 #define POLY1305_BLOCKSIZE 16 +/* POLY1305_USE_AVX512 indicates whether to compile with Intel AVX512 code. */ +#undef POLY1305_USE_AVX512 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX512) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define POLY1305_USE_AVX512 1 +#endif + + typedef struct { u32 k[4]; u32 r[4]; u32 h[5]; } POLY1305_STATE; typedef struct poly1305_context_s { POLY1305_STATE state; byte buffer[POLY1305_BLOCKSIZE]; unsigned int leftover; +#ifdef POLY1305_USE_AVX512 + unsigned int use_avx512:1; +#endif } poly1305_context_t; gcry_err_code_t _gcry_poly1305_init (poly1305_context_t *ctx, const byte *key, size_t keylen); void _gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]); void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *buf, size_t buflen); unsigned int _gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m, size_t bytes); #endif /* G10_POLY1305_INTERNAL_H */ diff --git a/cipher/poly1305.c b/cipher/poly1305.c index e57e64f3..5482fc6a 100644 --- a/cipher/poly1305.c +++ b/cipher/poly1305.c @@ -1,763 +1,809 @@ /* poly1305.c - Poly1305 internals and generic implementation * Copyright (C) 2014,2017,2018 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "poly1305-internal.h" #include "mpi-internal.h" #include "longlong.h" static const char *selftest (void); #undef HAVE_ASM_POLY1305_BLOCKS #undef USE_MPI_64BIT #undef USE_MPI_32BIT #if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_TYPE_U64) # define USE_MPI_64BIT 1 #elif BYTES_PER_MPI_LIMB == 4 # define USE_MPI_32BIT 1 #else # error please implement for this limb size. #endif /* USE_S390X_ASM indicates whether to enable zSeries code. */ #undef USE_S390X_ASM #if BYTES_PER_MPI_LIMB == 8 # if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 # if defined(HAVE_GCC_INLINE_ASM_S390X) # define USE_S390X_ASM 1 # endif /* USE_S390X_ASM */ # endif #endif +/* AMD64 Assembly implementations use SystemV ABI, ABI conversion and + * additional stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_FUNC_WRAPPER_ATTR +#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_FUNC_WRAPPER_ATTR __attribute__((noinline)) +#else +# define ASM_FUNC_ABI +# define ASM_FUNC_WRAPPER_ATTR +#endif + + #ifdef USE_S390X_ASM #define HAVE_ASM_POLY1305_BLOCKS 1 extern unsigned int _gcry_poly1305_s390x_blocks1(void *state, const byte *buf, size_t len, byte high_pad); static unsigned int poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, byte high_pad) { return _gcry_poly1305_s390x_blocks1(&ctx->state, buf, len, high_pad); } #endif /* USE_S390X_ASM */ +#ifdef POLY1305_USE_AVX512 + +extern unsigned int +_gcry_poly1305_amd64_avx512_blocks(const void *msg, const u64 msg_len, + void *hash, const void *key) ASM_FUNC_ABI; + +ASM_FUNC_WRAPPER_ATTR static unsigned int +poly1305_amd64_avx512_blocks(poly1305_context_t *ctx, const byte *buf, + size_t len) +{ + POLY1305_STATE *st = &ctx->state; + return _gcry_poly1305_amd64_avx512_blocks(buf, len, st->h, st->r); +} + +#endif /* POLY1305_USE_AVX512 */ + + static void poly1305_init (poly1305_context_t *ctx, const byte key[POLY1305_KEYLEN]) { POLY1305_STATE *st = &ctx->state; +#ifdef POLY1305_USE_AVX512 + ctx->use_avx512 = (_gcry_get_hw_features () & HWF_INTEL_AVX512) != 0; +#endif + ctx->leftover = 0; st->h[0] = 0; st->h[1] = 0; st->h[2] = 0; st->h[3] = 0; st->h[4] = 0; st->r[0] = buf_get_le32(key + 0) & 0x0fffffff; st->r[1] = buf_get_le32(key + 4) & 0x0ffffffc; st->r[2] = buf_get_le32(key + 8) & 0x0ffffffc; st->r[3] = buf_get_le32(key + 12) & 0x0ffffffc; st->k[0] = buf_get_le32(key + 16); st->k[1] = buf_get_le32(key + 20); st->k[2] = buf_get_le32(key + 24); st->k[3] = buf_get_le32(key + 28); } #ifdef USE_MPI_64BIT #if defined (__aarch64__) && defined(HAVE_CPU_ARCH_ARM) && __GNUC__ >= 4 /* A += B (armv8/aarch64) */ #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ __asm__ ("adds %0, %3, %0\n" \ "adcs %1, %4, %1\n" \ "adc %2, %5, %2\n" \ : "+r" (A0), "+r" (A1), "+r" (A2) \ : "r" (B0), "r" (B1), "r" (B2) \ : "cc" ) #endif /* __aarch64__ */ #if defined (__x86_64__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4 /* A += B (x86-64) */ #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ __asm__ ("addq %3, %0\n" \ "adcq %4, %1\n" \ "adcq %5, %2\n" \ : "+r" (A0), "+r" (A1), "+r" (A2) \ : "g" (B0), "g" (B1), "g" (B2) \ : "cc" ) #endif /* __x86_64__ */ #if defined (__powerpc__) && defined(HAVE_CPU_ARCH_PPC) && __GNUC__ >= 4 /* A += B (ppc64) */ #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ __asm__ ("addc %0, %3, %0\n" \ "adde %1, %4, %1\n" \ "adde %2, %5, %2\n" \ : "+r" (A0), "+r" (A1), "+r" (A2) \ : "r" (B0), "r" (B1), "r" (B2) \ : "cc" ) #endif /* __powerpc__ */ #ifndef ADD_1305_64 /* A += B (generic, mpi) */ # define ADD_1305_64(A2, A1, A0, B2, B1, B0) do { \ u64 carry; \ add_ssaaaa(carry, A0, 0, A0, 0, B0); \ add_ssaaaa(A2, A1, A2, A1, B2, B1); \ add_ssaaaa(A2, A1, A2, A1, 0, carry); \ } while (0) #endif /* H = H * R mod 2¹³⁰-5 */ #define MUL_MOD_1305_64(H2, H1, H0, R1, R0, R1_MULT5) do { \ u64 x0_lo, x0_hi, x1_lo, x1_hi; \ u64 t0_lo, t0_hi, t1_lo, t1_hi; \ \ /* x = a * r (partial mod 2^130-5) */ \ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ \ umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \ add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \ umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \ add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \ \ t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \ t1_hi = H2 * R0; /* h2 * r0 */ \ add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \ \ /* carry propagation */ \ H2 = H0 & 3; \ H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \ ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \ } while (0) #ifndef HAVE_ASM_POLY1305_BLOCKS static unsigned int -poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, - byte high_pad) +poly1305_blocks_generic (poly1305_context_t *ctx, const byte *buf, size_t len, + byte high_pad) { POLY1305_STATE *st = &ctx->state; u64 r0, r1, r1_mult5; u64 h0, h1, h2; u64 m0, m1, m2; m2 = high_pad; h0 = st->h[0] + ((u64)st->h[1] << 32); h1 = st->h[2] + ((u64)st->h[3] << 32); h2 = st->h[4]; r0 = st->r[0] + ((u64)st->r[1] << 32); r1 = st->r[2] + ((u64)st->r[3] << 32); r1_mult5 = (r1 >> 2) + r1; m0 = buf_get_le64(buf + 0); m1 = buf_get_le64(buf + 8); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; while (len >= POLY1305_BLOCKSIZE) { /* a = h + m */ ADD_1305_64(h2, h1, h0, m2, m1, m0); m0 = buf_get_le64(buf + 0); m1 = buf_get_le64(buf + 8); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; } /* a = h + m */ ADD_1305_64(h2, h1, h0, m2, m1, m0); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); st->h[0] = h0; st->h[1] = h0 >> 32; st->h[2] = h1; st->h[3] = h1 >> 32; st->h[4] = h2; return 6 * sizeof (void *) + 18 * sizeof (u64); } +static unsigned int +poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, + byte high_pad) +{ +#ifdef POLY1305_USE_AVX512 + if ((high_pad & ctx->use_avx512) != 0) + return poly1305_amd64_avx512_blocks(ctx, buf, len); +#endif + + return poly1305_blocks_generic(ctx, buf, len, high_pad); +} + #endif /* !HAVE_ASM_POLY1305_BLOCKS */ static unsigned int poly1305_final (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { POLY1305_STATE *st = &ctx->state; unsigned int burn = 0; u64 u, carry; u64 k0, k1; u64 h0, h1; u64 h2; /* process the remaining block */ if (ctx->leftover) { ctx->buffer[ctx->leftover++] = 1; if (ctx->leftover < POLY1305_BLOCKSIZE) { memset (&ctx->buffer[ctx->leftover], 0, POLY1305_BLOCKSIZE - ctx->leftover); ctx->leftover = POLY1305_BLOCKSIZE; } burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); } h0 = st->h[0] + ((u64)st->h[1] << 32); h1 = st->h[2] + ((u64)st->h[3] << 32); h2 = st->h[4]; k0 = st->k[0] + ((u64)st->k[1] << 32); k1 = st->k[2] + ((u64)st->k[3] << 32); /* check if h is more than 2^130-5, by adding 5. */ add_ssaaaa(carry, u, 0, h0, 0, 5); add_ssaaaa(carry, u, 0, carry, 0, h1); u = (carry + h2) >> 2; /* u == 0 or 1 */ /* minus 2^130-5 ... (+5) */ u = (-u) & 5; add_ssaaaa(h1, h0, h1, h0, 0, u); /* add high part of key + h */ add_ssaaaa(h1, h0, h1, h0, k1, k0); buf_put_le64(mac + 0, h0); buf_put_le64(mac + 8, h1); /* burn_stack */ return 4 * sizeof (void *) + 7 * sizeof (u64) + burn; } #endif /* USE_MPI_64BIT */ #ifdef USE_MPI_32BIT #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS /* HI:LO += A * B (arm) */ #define UMUL_ADD_32(HI, LO, A, B) \ __asm__ ("umlal %1, %0, %4, %5" \ : "=r" (HI), "=r" (LO) \ : "0" (HI), "1" (LO), "r" (A), "r" (B) ) /* A += B (arm) */ #ifdef __GCC_ASM_FLAG_OUTPUTS__ # define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ u32 __carry; \ __asm__ ("adds %0, %0, %5\n" \ "adcs %1, %1, %6\n" \ "adcs %2, %2, %7\n" \ "adcs %3, %3, %8\n" \ : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), \ "=@cccs" (__carry) \ : "r" (B0), "r" (B1), "r" (B2), "r" (B3) \ : ); \ (A4) += (B4) + __carry; \ } while (0) #else # define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ u32 __carry = (B0); \ __asm__ ("adds %0, %0, %2\n" \ "adcs %1, %1, %3\n" \ "rrx %2, %2\n" /* carry to 31th bit */ \ : "+r" (A0), "+r" (A1), "+r" (__carry) \ : "r" (B1), "r" (0) \ : "cc" ); \ __asm__ ("lsls %0, %0, #1\n" /* carry from 31th bit */ \ "adcs %1, %1, %4\n" \ "adcs %2, %2, %5\n" \ "adc %3, %3, %6\n" \ : "+r" (__carry), "+r" (A2), "+r" (A3), "+r" (A4) \ : "r" (B2), "r" (B3), "r" (B4) \ : "cc" ); \ } while (0) #endif #endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */ #if defined (__i386__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 5 /* Note: ADD_1305_32 below does not compile on GCC-4.7 */ /* A += B (i386) */ #define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \ __asm__ ("addl %5, %0\n" \ "adcl %6, %1\n" \ "adcl %7, %2\n" \ "adcl %8, %3\n" \ "adcl %9, %4\n" \ : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \ : "g" (B0), "g" (B1), "g" (B2), "g" (B3), "g" (B4) \ : "cc" ) #endif /* __i386__ */ #ifndef UMUL_ADD_32 /* HI:LO += A * B (generic, mpi) */ # define UMUL_ADD_32(HI, LO, A, B) do { \ u32 t_lo, t_hi; \ umul_ppmm(t_hi, t_lo, A, B); \ add_ssaaaa(HI, LO, HI, LO, t_hi, t_lo); \ } while (0) #endif #ifndef ADD_1305_32 /* A += B (generic, mpi) */ # define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ u32 carry0, carry1, carry2; \ add_ssaaaa(carry0, A0, 0, A0, 0, B0); \ add_ssaaaa(carry1, A1, 0, A1, 0, B1); \ add_ssaaaa(carry1, A1, carry1, A1, 0, carry0); \ add_ssaaaa(carry2, A2, 0, A2, 0, B2); \ add_ssaaaa(carry2, A2, carry2, A2, 0, carry1); \ add_ssaaaa(A4, A3, A4, A3, B4, B3); \ add_ssaaaa(A4, A3, A4, A3, 0, carry2); \ } while (0) #endif /* H = H * R mod 2¹³⁰-5 */ #define MUL_MOD_1305_32(H4, H3, H2, H1, H0, R3, R2, R1, R0, \ R3_MULT5, R2_MULT5, R1_MULT5) do { \ u32 x0_lo, x0_hi, x1_lo, x1_hi, x2_lo, x2_hi, x3_lo, x3_hi; \ u32 t0_lo, t0_hi; \ \ /* x = a * r (partial mod 2^130-5) */ \ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ umul_ppmm(x2_hi, x2_lo, H0, R2); /* h0 * r2 */ \ umul_ppmm(x3_hi, x3_lo, H0, R3); /* h0 * r3 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H1, R3_MULT5); /* h1 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x1_hi, x1_lo, H1, R0); /* h1 * r0 */ \ UMUL_ADD_32(x2_hi, x2_lo, H1, R1); /* h1 * r1 */ \ UMUL_ADD_32(x3_hi, x3_lo, H1, R2); /* h1 * r2 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H2, R2_MULT5); /* h2 * r2 mod 2^130-5 */ \ UMUL_ADD_32(x1_hi, x1_lo, H2, R3_MULT5); /* h2 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x2_hi, x2_lo, H2, R0); /* h2 * r0 */ \ UMUL_ADD_32(x3_hi, x3_lo, H2, R1); /* h2 * r1 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H3, R1_MULT5); /* h3 * r1 mod 2^130-5 */ \ H1 = x0_hi; \ UMUL_ADD_32(x1_hi, x1_lo, H3, R2_MULT5); /* h3 * r2 mod 2^130-5 */ \ UMUL_ADD_32(x2_hi, x2_lo, H3, R3_MULT5); /* h3 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x3_hi, x3_lo, H3, R0); /* h3 * r0 */ \ \ t0_lo = H4 * R1_MULT5; /* h4 * r1 mod 2^130-5 */ \ t0_hi = H4 * R2_MULT5; /* h4 * r2 mod 2^130-5 */ \ add_ssaaaa(H2, x1_lo, x1_hi, x1_lo, 0, t0_lo); \ add_ssaaaa(H3, x2_lo, x2_hi, x2_lo, 0, t0_hi); \ t0_lo = H4 * R3_MULT5; /* h4 * r3 mod 2^130-5 */ \ t0_hi = H4 * R0; /* h4 * r0 */ \ add_ssaaaa(H4, x3_lo, x3_hi, x3_lo, t0_hi, t0_lo); \ \ /* carry propagation */ \ H0 = (H4 >> 2) * 5; /* msb mod 2^130-5 */ \ H4 = H4 & 3; \ ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \ } while (0) #ifndef HAVE_ASM_POLY1305_BLOCKS static unsigned int poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, byte high_pad) { POLY1305_STATE *st = &ctx->state; u32 r1_mult5, r2_mult5, r3_mult5; u32 h0, h1, h2, h3, h4; u32 m0, m1, m2, m3, m4; m4 = high_pad; h0 = st->h[0]; h1 = st->h[1]; h2 = st->h[2]; h3 = st->h[3]; h4 = st->h[4]; r1_mult5 = (st->r[1] >> 2) + st->r[1]; r2_mult5 = (st->r[2] >> 2) + st->r[2]; r3_mult5 = (st->r[3] >> 2) + st->r[3]; while (len >= POLY1305_BLOCKSIZE) { m0 = buf_get_le32(buf + 0); m1 = buf_get_le32(buf + 4); m2 = buf_get_le32(buf + 8); m3 = buf_get_le32(buf + 12); /* a = h + m */ ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_32(h4, h3, h2, h1, h0, st->r[3], st->r[2], st->r[1], st->r[0], r3_mult5, r2_mult5, r1_mult5); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; } st->h[0] = h0; st->h[1] = h1; st->h[2] = h2; st->h[3] = h3; st->h[4] = h4; return 6 * sizeof (void *) + 28 * sizeof (u32); } #endif /* !HAVE_ASM_POLY1305_BLOCKS */ static unsigned int poly1305_final (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { POLY1305_STATE *st = &ctx->state; unsigned int burn = 0; u32 carry, tmp0, tmp1, tmp2, u; u32 h4, h3, h2, h1, h0; /* process the remaining block */ if (ctx->leftover) { ctx->buffer[ctx->leftover++] = 1; if (ctx->leftover < POLY1305_BLOCKSIZE) { memset (&ctx->buffer[ctx->leftover], 0, POLY1305_BLOCKSIZE - ctx->leftover); ctx->leftover = POLY1305_BLOCKSIZE; } burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); } h0 = st->h[0]; h1 = st->h[1]; h2 = st->h[2]; h3 = st->h[3]; h4 = st->h[4]; /* check if h is more than 2^130-5, by adding 5. */ add_ssaaaa(carry, tmp0, 0, h0, 0, 5); add_ssaaaa(carry, tmp0, 0, carry, 0, h1); add_ssaaaa(carry, tmp0, 0, carry, 0, h2); add_ssaaaa(carry, tmp0, 0, carry, 0, h3); u = (carry + h4) >> 2; /* u == 0 or 1 */ /* minus 2^130-5 ... (+5) */ u = (-u) & 5; add_ssaaaa(carry, h0, 0, h0, 0, u); add_ssaaaa(carry, h1, 0, h1, 0, carry); add_ssaaaa(carry, h2, 0, h2, 0, carry); add_ssaaaa(carry, h3, 0, h3, 0, carry); /* add high part of key + h */ add_ssaaaa(tmp0, h0, 0, h0, 0, st->k[0]); add_ssaaaa(tmp1, h1, 0, h1, 0, st->k[1]); add_ssaaaa(tmp1, h1, tmp1, h1, 0, tmp0); add_ssaaaa(tmp2, h2, 0, h2, 0, st->k[2]); add_ssaaaa(tmp2, h2, tmp2, h2, 0, tmp1); add_ssaaaa(carry, h3, 0, h3, 0, st->k[3]); h3 += tmp2; buf_put_le32(mac + 0, h0); buf_put_le32(mac + 4, h1); buf_put_le32(mac + 8, h2); buf_put_le32(mac + 12, h3); /* burn_stack */ return 4 * sizeof (void *) + 10 * sizeof (u32) + burn; } #endif /* USE_MPI_32BIT */ unsigned int _gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m, size_t bytes) { unsigned int burn = 0; /* handle leftover */ if (ctx->leftover) { size_t want = (POLY1305_BLOCKSIZE - ctx->leftover); if (want > bytes) want = bytes; buf_cpy (ctx->buffer + ctx->leftover, m, want); bytes -= want; m += want; ctx->leftover += want; if (ctx->leftover < POLY1305_BLOCKSIZE) return 0; burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1); ctx->leftover = 0; } /* process full blocks */ if (bytes >= POLY1305_BLOCKSIZE) { size_t nblks = bytes / POLY1305_BLOCKSIZE; burn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1); m += nblks * POLY1305_BLOCKSIZE; bytes -= nblks * POLY1305_BLOCKSIZE; } /* store leftover */ if (bytes) { buf_cpy (ctx->buffer + ctx->leftover, m, bytes); ctx->leftover += bytes; } return burn; } void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *m, size_t bytes) { unsigned int burn; burn = _gcry_poly1305_update_burn (ctx, m, bytes); if (burn) _gcry_burn_stack (burn); } void _gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { unsigned int burn; burn = poly1305_final (ctx, mac); _gcry_burn_stack (burn); } gcry_err_code_t _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, size_t keylen) { static int initialized; static const char *selftest_failed; if (!initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("Poly1305 selftest failed (%s)\n", selftest_failed); } if (keylen != POLY1305_KEYLEN) return GPG_ERR_INV_KEYLEN; if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; poly1305_init (ctx, key); return 0; } static void poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes, const byte * key) { poly1305_context_t ctx; memset (&ctx, 0, sizeof (ctx)); _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN); _gcry_poly1305_update (&ctx, m, bytes); _gcry_poly1305_finish (&ctx, mac); wipememory (&ctx, sizeof (ctx)); } static const char * selftest (void) { /* example from nacl */ static const byte nacl_key[POLY1305_KEYLEN] = { 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91, 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25, 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65, 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80, }; static const byte nacl_msg[131] = { 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73, 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce, 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4, 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a, 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b, 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72, 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2, 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38, 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a, 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae, 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea, 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda, 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde, 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3, 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6, 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74, 0xe3, 0x55, 0xa5 }; static const byte nacl_mac[16] = { 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5, 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9 }; /* generates a final value of (2^130 - 2) == 3 */ static const byte wrap_key[POLY1305_KEYLEN] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static const byte wrap_msg[16] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const byte wrap_mac[16] = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; /* mac of the macs of messages of length 0 to 256, where the key and messages * have all their values set to the length */ static const byte total_key[POLY1305_KEYLEN] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const byte total_mac[16] = { 0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd, 0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39 }; poly1305_context_t ctx; poly1305_context_t total_ctx; byte all_key[POLY1305_KEYLEN]; byte all_msg[256]; byte mac[16]; size_t i, j; memset (&ctx, 0, sizeof (ctx)); memset (&total_ctx, 0, sizeof (total_ctx)); memset (mac, 0, sizeof (mac)); poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key); if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 1 failed."; /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so * make sure everything still works varying between them */ memset (mac, 0, sizeof (mac)); _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN); _gcry_poly1305_update (&ctx, nacl_msg + 0, 32); _gcry_poly1305_update (&ctx, nacl_msg + 32, 64); _gcry_poly1305_update (&ctx, nacl_msg + 96, 16); _gcry_poly1305_update (&ctx, nacl_msg + 112, 8); _gcry_poly1305_update (&ctx, nacl_msg + 120, 4); _gcry_poly1305_update (&ctx, nacl_msg + 124, 2); _gcry_poly1305_update (&ctx, nacl_msg + 126, 1); _gcry_poly1305_update (&ctx, nacl_msg + 127, 1); _gcry_poly1305_update (&ctx, nacl_msg + 128, 1); _gcry_poly1305_update (&ctx, nacl_msg + 129, 1); _gcry_poly1305_update (&ctx, nacl_msg + 130, 1); _gcry_poly1305_finish (&ctx, mac); if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 2 failed."; memset (mac, 0, sizeof (mac)); poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key); if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 3 failed."; _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN); for (i = 0; i < 256; i++) { /* set key and message to 'i,i,i..' */ for (j = 0; j < sizeof (all_key); j++) all_key[j] = i; for (j = 0; j < i; j++) all_msg[j] = i; poly1305_auth (mac, all_msg, i, all_key); _gcry_poly1305_update (&total_ctx, mac, 16); } _gcry_poly1305_finish (&total_ctx, mac); if (memcmp (total_mac, mac, sizeof (total_mac)) != 0) return "Poly1305 test 4 failed."; return NULL; } diff --git a/configure.ac b/configure.ac index fc49bb86..eb149a51 100644 --- a/configure.ac +++ b/configure.ac @@ -1,3348 +1,3351 @@ # Configure.ac script for Libgcrypt # Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, # 2007, 2008, 2009, 2011 Free Software Foundation, Inc. # Copyright (C) 2012-2021 g10 Code GmbH # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # (Process this file with autoconf to produce a configure script.) AC_REVISION($Revision$) AC_PREREQ([2.69]) min_automake_version="1.14" # To build a release you need to create a tag with the version number # (git tag -s libgcrypt-n.m.k) and run "./autogen.sh --force". Please # bump the version number immediately after the release and do another # commit and push so that the git magic is able to work. See below # for the LT versions. m4_define([mym4_package],[libgcrypt]) m4_define([mym4_major], [1]) m4_define([mym4_minor], [11]) m4_define([mym4_micro], [0]) # Below is m4 magic to extract and compute the git revision number, # the decimalized short revision number, a beta version string and a # flag indicating a development version (mym4_isbeta). Note that the # m4 processing is done by autoconf and not during the configure run. m4_define([mym4_verslist], m4_split(m4_esyscmd([./autogen.sh --find-version] \ mym4_package mym4_major mym4_minor mym4_micro),[:])) m4_define([mym4_isbeta], m4_argn(2, mym4_verslist)) m4_define([mym4_version], m4_argn(4, mym4_verslist)) m4_define([mym4_revision], m4_argn(7, mym4_verslist)) m4_define([mym4_revision_dec], m4_argn(8, mym4_verslist)) m4_esyscmd([echo ]mym4_version[>VERSION]) AC_INIT([mym4_package],[mym4_version],[https://bugs.gnupg.org]) # LT Version numbers, remember to change them just *before* a release. # NOET NOTE - Already updated for a 1.11 series - NOTE NOTE # (Code changed: REVISION++) # (Interfaces added/removed/changed: CURRENT++, REVISION=0) # (Interfaces added: AGE++) # (Interfaces removed: AGE=0) # # (Interfaces removed: CURRENT++, AGE=0, REVISION=0) # (Interfaces added: CURRENT++, AGE++, REVISION=0) # (No interfaces changed: REVISION++) LIBGCRYPT_LT_CURRENT=25 LIBGCRYPT_LT_AGE=5 LIBGCRYPT_LT_REVISION=0 ################################################ AC_SUBST(LIBGCRYPT_LT_CURRENT) AC_SUBST(LIBGCRYPT_LT_AGE) AC_SUBST(LIBGCRYPT_LT_REVISION) # If the API is changed in an incompatible way: increment the next counter. # # 1.6: ABI and API change but the change is to most users irrelevant # and thus the API version number has not been incremented. LIBGCRYPT_CONFIG_API_VERSION=1 # If you change the required gpg-error version, please remove # unnecessary error code defines in src/gcrypt-int.h. NEED_GPG_ERROR_VERSION=1.27 AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_SRCDIR([src/libgcrypt.vers]) AM_INIT_AUTOMAKE([serial-tests dist-bzip2]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_LIBOBJ_DIR([compat]) AC_CANONICAL_HOST AM_MAINTAINER_MODE AM_SILENT_RULES AC_ARG_VAR(SYSROOT,[locate config scripts also below that directory]) AH_TOP([ #ifndef _GCRYPT_CONFIG_H_INCLUDED #define _GCRYPT_CONFIG_H_INCLUDED /* Enable gpg-error's strerror macro for W32CE. */ #define GPG_ERR_ENABLE_ERRNO_MACROS 1 ]) AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 /* Add .note.gnu.property section for Intel CET in assembler sources when CET is enabled. */ #if defined(__ASSEMBLER__) && defined(__CET__) # include #endif /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK # if defined(__BIG_ENDIAN__) # define WORDS_BIGENDIAN 1 # elif defined(__LITTLE_ENDIAN__) # undef WORDS_BIGENDIAN # else # error "No endianness found" # endif #endif /*DISABLED_ENDIAN_CHECK*/ /* We basically use the original Camellia source. Make sure the symbols properly prefixed. */ #define CAMELLIA_EXT_SYM_PREFIX _gcry_ #endif /*_GCRYPT_CONFIG_H_INCLUDED*/ ]) AH_VERBATIM([_REENTRANT], [/* To allow the use of Libgcrypt in multithreaded programs we have to use special features from the library. */ #ifndef _REENTRANT # define _REENTRANT 1 #endif ]) ###################### ## Basic checks. ### (we need some results later on (e.g. $GCC) ###################### AC_PROG_MAKE_SET missing_dir=`cd $ac_aux_dir && pwd` AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) # AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir) AC_PROG_CC AC_PROG_CPP AM_PROG_CC_C_O AM_PROG_AS AC_SEARCH_LIBS([strerror],[cposix]) AC_PROG_INSTALL AC_PROG_AWK AC_USE_SYSTEM_EXTENSIONS # Taken from mpfr-4.0.1, then modified for LDADD_FOR_TESTS_KLUDGE dnl Under Linux, make sure that the old dtags are used if LD_LIBRARY_PATH dnl is defined. The issue is that with the new dtags, LD_LIBRARY_PATH has dnl the precedence over the run path, so that if a compatible MPFR library dnl is installed in some directory from $LD_LIBRARY_PATH, then the tested dnl MPFR library will be this library instead of the MPFR library from the dnl build tree. Other OS with the same issue might be added later. dnl dnl References: dnl https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=859732 dnl http://lists.gnu.org/archive/html/libtool/2017-05/msg00000.html dnl dnl We need to check whether --disable-new-dtags is supported as alternate dnl linkers may be used (e.g., with tcc: CC=tcc LD=tcc). dnl case $host in *-*-linux*) if test -n "$LD_LIBRARY_PATH"; then saved_LDFLAGS="$LDFLAGS" LDADD_FOR_TESTS_KLUDGE="-Wl,--disable-new-dtags" LDFLAGS="$LDFLAGS $LDADD_FOR_TESTS_KLUDGE" AC_MSG_CHECKING(whether --disable-new-dtags is supported by the linker) AC_LINK_IFELSE([AC_LANG_SOURCE([[ int main (void) { return 0; } ]])], [AC_MSG_RESULT(yes (use it since LD_LIBRARY_PATH is set))], [AC_MSG_RESULT(no) LDADD_FOR_TESTS_KLUDGE="" ]) LDFLAGS="$saved_LDFLAGS" fi ;; esac AC_SUBST([LDADD_FOR_TESTS_KLUDGE]) VERSION_NUMBER=m4_esyscmd(printf "0x%02x%02x%02x" mym4_major \ mym4_minor mym4_micro) AC_SUBST(VERSION_NUMBER) # We need to compile and run a program on the build machine. AX_CC_FOR_BUILD LT_PREREQ([2.2.6]) LT_INIT([win32-dll disable-static]) LT_LANG([Windows Resource]) ########################## ## General definitions. ## ########################## # Used by libgcrypt-config LIBGCRYPT_CONFIG_LIBS="-lgcrypt" LIBGCRYPT_CONFIG_CFLAGS="" LIBGCRYPT_CONFIG_HOST="$host" # Definitions for symmetric ciphers. available_ciphers="arcfour blowfish cast5 des aes twofish serpent rfc2268 seed" available_ciphers="$available_ciphers camellia idea salsa20 gost28147 chacha20" available_ciphers="$available_ciphers sm4" enabled_ciphers="" # Definitions for public-key ciphers. available_pubkey_ciphers="dsa elgamal rsa ecc" enabled_pubkey_ciphers="" # Definitions for message digests. available_digests="crc gostr3411-94 md2 md4 md5 rmd160 sha1 sha256 sha512" available_digests="$available_digests sha3 tiger whirlpool stribog blake2" available_digests="$available_digests sm3" enabled_digests="" # Definitions for kdfs (optional ones) available_kdfs="s2k pkdf2 scrypt" enabled_kdfs="" # Definitions for random modules. available_random_modules="getentropy linux egd unix" auto_random_modules="$available_random_modules" # Supported thread backends. LIBGCRYPT_THREAD_MODULES="" # Other definitions. have_w32_system=no have_w32ce_system=no have_pthread=no # Setup some stuff depending on host. case "${host}" in *-*-mingw32*) ac_cv_have_dev_random=no have_w32_system=yes case "${host}" in *-mingw32ce*) have_w32ce_system=yes available_random_modules="w32ce" ;; *) available_random_modules="w32" ;; esac AC_DEFINE(USE_ONLY_8DOT3,1, [set this to limit filenames to the 8.3 format]) AC_DEFINE(HAVE_DRIVE_LETTERS,1, [defined if we must run on a stupid file system]) AC_DEFINE(HAVE_DOSISH_SYSTEM,1, [defined if we run on some of the PCDOS like systems (DOS, Windoze. OS/2) with special properties like no file modes]) ;; i?86-emx-os2 | i?86-*-os2*emx) # OS/2 with the EMX environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; i?86-*-msdosdjgpp*) # DOS with the DJGPP environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; *-*-hpux*) if test -z "$GCC" ; then CFLAGS="$CFLAGS -Ae -D_HPUX_SOURCE" fi ;; *-dec-osf4*) if test -z "$GCC" ; then # Suppress all warnings # to get rid of the unsigned/signed char mismatch warnings. CFLAGS="$CFLAGS -w" fi ;; m68k-atari-mint) ;; *-apple-darwin*) AC_DEFINE(_DARWIN_C_SOURCE, 1, Expose all libc features (__DARWIN_C_FULL).) AC_DEFINE(USE_POSIX_SPAWN_FOR_TESTS, 1, [defined if we use posix_spawn in test program]) AC_CHECK_HEADERS(spawn.h) ;; *) ;; esac if test "$have_w32_system" = yes; then AC_DEFINE(HAVE_W32_SYSTEM,1, [Defined if we run on a W32 API based system]) if test "$have_w32ce_system" = yes; then AC_DEFINE(HAVE_W32CE_SYSTEM,1,[Defined if we run on WindowsCE]) fi fi AM_CONDITIONAL(HAVE_W32_SYSTEM, test "$have_w32_system" = yes) AM_CONDITIONAL(HAVE_W32CE_SYSTEM, test "$have_w32ce_system" = yes) # A printable OS Name is sometimes useful. case "${host}" in *-*-mingw32ce*) PRINTABLE_OS_NAME="W32CE" ;; *-*-mingw32*) PRINTABLE_OS_NAME="W32" ;; i?86-emx-os2 | i?86-*-os2*emx ) PRINTABLE_OS_NAME="OS/2" ;; i?86-*-msdosdjgpp*) PRINTABLE_OS_NAME="MSDOS/DJGPP" ;; *-linux*) PRINTABLE_OS_NAME="GNU/Linux" ;; *) PRINTABLE_OS_NAME=`uname -s || echo "Unknown"` ;; esac NAME_OF_DEV_RANDOM="/dev/random" NAME_OF_DEV_URANDOM="/dev/urandom" AC_ARG_ENABLE(endian-check, AS_HELP_STRING([--disable-endian-check], [disable the endian check and trust the OS provided macros]), endiancheck=$enableval,endiancheck=yes) if test x"$endiancheck" = xyes ; then AC_C_BIGENDIAN else AC_DEFINE(DISABLED_ENDIAN_CHECK,1,[configure did not test for endianness]) fi AC_CHECK_SIZEOF(unsigned short, 2) AC_CHECK_SIZEOF(unsigned int, 4) AC_CHECK_SIZEOF(unsigned long, 4) AC_CHECK_SIZEOF(unsigned long long, 0) AC_CHECK_SIZEOF(void *, 0) AC_TYPE_UINTPTR_T if test "$ac_cv_sizeof_unsigned_short" = "0" \ || test "$ac_cv_sizeof_unsigned_int" = "0" \ || test "$ac_cv_sizeof_unsigned_long" = "0"; then AC_MSG_WARN([Hmmm, something is wrong with the sizes - using defaults]); fi # Ensure that we have UINT64_C before we bother to check for uint64_t AC_CACHE_CHECK([for UINT64_C],[gnupg_cv_uint64_c_works], AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[uint64_t foo=UINT64_C(42);]])], gnupg_cv_uint64_c_works=yes,gnupg_cv_uint64_c_works=no)) if test "$gnupg_cv_uint64_c_works" = "yes" ; then AC_CHECK_SIZEOF(uint64_t) fi # Do we have any 64-bit data types? if test "$ac_cv_sizeof_unsigned_int" != "8" \ && test "$ac_cv_sizeof_unsigned_long" != "8" \ && test "$ac_cv_sizeof_unsigned_long_long" != "8" \ && test "$ac_cv_sizeof_uint64_t" != "8"; then AC_MSG_ERROR([[ *** *** No 64-bit integer type available. *** It is not possible to build Libgcrypt on this platform. ***]]) fi # If not specified otherwise, all available algorithms will be # included. default_ciphers="$available_ciphers" default_pubkey_ciphers="$available_pubkey_ciphers" default_digests="$available_digests" default_kdfs="$available_kdfs" # Blacklist MD2 by default default_digests=`echo $default_digests | sed -e 's/md2//g'` # Substitutions to set generated files in a Emacs buffer to read-only. AC_SUBST(emacs_local_vars_begin, ['Local Variables:']) AC_SUBST(emacs_local_vars_read_only, ['buffer-read-only: t']) AC_SUBST(emacs_local_vars_end, ['End:']) ############################ ## Command line switches. ## ############################ # Implementation of the --enable-ciphers switch. AC_ARG_ENABLE(ciphers, AS_HELP_STRING([--enable-ciphers=ciphers], [select the symmetric ciphers to include]), [enabled_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_ciphers=""]) if test "x$enabled_ciphers" = "x" \ -o "$enabled_ciphers" = "yes" \ -o "$enabled_ciphers" = "no"; then enabled_ciphers=$default_ciphers fi AC_MSG_CHECKING([which symmetric ciphers to include]) for cipher in $enabled_ciphers; do LIST_MEMBER($cipher, $available_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported cipher "$cipher" specified]) fi done AC_MSG_RESULT([$enabled_ciphers]) # Implementation of the --enable-pubkey-ciphers switch. AC_ARG_ENABLE(pubkey-ciphers, AS_HELP_STRING([--enable-pubkey-ciphers=ciphers], [select the public-key ciphers to include]), [enabled_pubkey_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_pubkey_ciphers=""]) if test "x$enabled_pubkey_ciphers" = "x" \ -o "$enabled_pubkey_ciphers" = "yes" \ -o "$enabled_pubkey_ciphers" = "no"; then enabled_pubkey_ciphers=$default_pubkey_ciphers fi AC_MSG_CHECKING([which public-key ciphers to include]) for cipher in $enabled_pubkey_ciphers; do LIST_MEMBER($cipher, $available_pubkey_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported public-key cipher specified]) fi done AC_MSG_RESULT([$enabled_pubkey_ciphers]) # Implementation of the --enable-digests switch. AC_ARG_ENABLE(digests, AS_HELP_STRING([--enable-digests=digests], [select the message digests to include]), [enabled_digests=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_digests=""]) if test "x$enabled_digests" = "x" \ -o "$enabled_digests" = "yes" \ -o "$enabled_digests" = "no"; then enabled_digests=$default_digests fi AC_MSG_CHECKING([which message digests to include]) for digest in $enabled_digests; do LIST_MEMBER($digest, $available_digests) if test "$found" = "0"; then AC_MSG_ERROR([unsupported message digest specified]) fi done AC_MSG_RESULT([$enabled_digests]) # Implementation of the --enable-kdfs switch. AC_ARG_ENABLE(kdfs, AS_HELP_STRING([--enable-kfds=kdfs], [select the KDFs to include]), [enabled_kdfs=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_kdfs=""]) if test "x$enabled_kdfs" = "x" \ -o "$enabled_kdfs" = "yes" \ -o "$enabled_kdfs" = "no"; then enabled_kdfs=$default_kdfs fi AC_MSG_CHECKING([which key derivation functions to include]) for kdf in $enabled_kdfs; do LIST_MEMBER($kdf, $available_kdfs) if test "$found" = "0"; then AC_MSG_ERROR([unsupported key derivation function specified]) fi done AC_MSG_RESULT([$enabled_kdfs]) # Implementation of the --enable-random switch. AC_ARG_ENABLE(random, AS_HELP_STRING([--enable-random=name], [select which random number generator to use]), [random=`echo $enableval | tr '[A-Z]' '[a-z]'`], []) if test "x$random" = "x" -o "$random" = "yes" -o "$random" = "no"; then random=default fi AC_MSG_CHECKING([which random module to use]) if test "$random" != "default" -a "$random" != "auto"; then LIST_MEMBER($random, $available_random_modules) if test "$found" = "0"; then AC_MSG_ERROR([unsupported random module specified]) fi fi AC_MSG_RESULT($random) # Implementation of the --disable-dev-random switch. AC_MSG_CHECKING([whether use of /dev/random is requested]) AC_ARG_ENABLE(dev-random, [ --disable-dev-random disable the use of dev random], try_dev_random=$enableval, try_dev_random=yes) AC_MSG_RESULT($try_dev_random) # Implementation of the --with-egd-socket switch. AC_ARG_WITH(egd-socket, [ --with-egd-socket=NAME Use NAME for the EGD socket)], egd_socket_name="$withval", egd_socket_name="" ) AC_DEFINE_UNQUOTED(EGD_SOCKET_NAME, "$egd_socket_name", [Define if you don't want the default EGD socket name. For details see cipher/rndegd.c]) # Implementation of --disable-asm. AC_MSG_CHECKING([whether MPI and cipher assembler modules are requested]) AC_ARG_ENABLE([asm], AS_HELP_STRING([--disable-asm], [Disable MPI and cipher assembler modules]), [try_asm_modules=$enableval], [try_asm_modules=yes]) AC_MSG_RESULT($try_asm_modules) if test "$try_asm_modules" != yes ; then AC_DEFINE(ASM_DISABLED,1,[Defined if --disable-asm was used to configure]) fi # Implementation of the --enable-large-data-tests switch. AC_MSG_CHECKING([whether to run large data tests]) AC_ARG_ENABLE(large-data-tests, AS_HELP_STRING([--enable-large-data-tests], [Enable the real long ruinning large data tests]), large_data_tests=$enableval,large_data_tests=no) AC_MSG_RESULT($large_data_tests) AC_SUBST(RUN_LARGE_DATA_TESTS, $large_data_tests) # Implementation of --enable-force-soft-hwfeatures AC_MSG_CHECKING([whether 'soft' HW feature bits are forced on]) AC_ARG_ENABLE([force-soft-hwfeatures], AS_HELP_STRING([--enable-force-soft-hwfeatures], [Enable forcing 'soft' HW feature bits on]), [force_soft_hwfeatures=$enableval], [force_soft_hwfeatures=no]) AC_MSG_RESULT($force_soft_hwfeatures) # Implementation of the --with-capabilities switch. # Check whether we want to use Linux capabilities AC_MSG_CHECKING([whether use of capabilities is requested]) AC_ARG_WITH(capabilities, AS_HELP_STRING([--with-capabilities], [Use linux capabilities [default=no]]), [use_capabilities="$withval"],[use_capabilities=no]) AC_MSG_RESULT($use_capabilities) # Implementation of the --enable-hmac-binary-check. AC_MSG_CHECKING([whether a HMAC binary check is requested]) AC_ARG_ENABLE(hmac-binary-check, AS_HELP_STRING([--enable-hmac-binary-check], [Enable library integrity check]), [use_hmac_binary_check="$enableval"], [use_hmac_binary_check=no]) AC_MSG_RESULT($use_hmac_binary_check) if test "$use_hmac_binary_check" = no ; then DEF_HMAC_BINARY_CHECK='' else AC_DEFINE(ENABLE_HMAC_BINARY_CHECK,1, [Define to support an HMAC based integrity check]) AC_CHECK_TOOL(OBJCOPY, [objcopy]) AC_CHECK_TOOL(READELF, [readelf]) if test "$use_hmac_binary_check" != yes ; then DEF_HMAC_BINARY_CHECK=-DKEY_FOR_BINARY_CHECK="'\"$use_hmac_binary_check\"'" fi fi AM_CONDITIONAL(USE_HMAC_BINARY_CHECK, test "x$use_hmac_binary_check" != xno) AC_SUBST(DEF_HMAC_BINARY_CHECK) # Implementation of the --with-fips-module-version. AC_ARG_WITH(fips-module-version, AS_HELP_STRING([--with-fips-module-version=VERSION], [Specify the FIPS module version for the build]), fips_module_version="$withval", fips_module_version="" ) AC_DEFINE_UNQUOTED(FIPS_MODULE_VERSION, "$fips_module_version", [Define FIPS module version for certification]) # Implementation of the --disable-jent-support switch. AC_MSG_CHECKING([whether jitter entropy support is requested]) AC_ARG_ENABLE(jent-support, AS_HELP_STRING([--disable-jent-support], [Disable support for the Jitter entropy collector]), jentsupport=$enableval,jentsupport=yes) AC_MSG_RESULT($jentsupport) # Implementation of the --disable-padlock-support switch. AC_MSG_CHECKING([whether padlock support is requested]) AC_ARG_ENABLE(padlock-support, AS_HELP_STRING([--disable-padlock-support], [Disable support for the PadLock Engine of VIA processors]), padlocksupport=$enableval,padlocksupport=yes) AC_MSG_RESULT($padlocksupport) # Implementation of the --disable-aesni-support switch. AC_MSG_CHECKING([whether AESNI support is requested]) AC_ARG_ENABLE(aesni-support, AS_HELP_STRING([--disable-aesni-support], [Disable support for the Intel AES-NI instructions]), aesnisupport=$enableval,aesnisupport=yes) AC_MSG_RESULT($aesnisupport) # Implementation of the --disable-shaext-support switch. AC_MSG_CHECKING([whether SHAEXT support is requested]) AC_ARG_ENABLE(shaext-support, AS_HELP_STRING([--disable-shaext-support], [Disable support for the Intel SHAEXT instructions]), shaextsupport=$enableval,shaextsupport=yes) AC_MSG_RESULT($shaextsupport) # Implementation of the --disable-pclmul-support switch. AC_MSG_CHECKING([whether PCLMUL support is requested]) AC_ARG_ENABLE(pclmul-support, AS_HELP_STRING([--disable-pclmul-support], [Disable support for the Intel PCLMUL instructions]), pclmulsupport=$enableval,pclmulsupport=yes) AC_MSG_RESULT($pclmulsupport) # Implementation of the --disable-sse41-support switch. AC_MSG_CHECKING([whether SSE4.1 support is requested]) AC_ARG_ENABLE(sse41-support, AS_HELP_STRING([--disable-sse41-support], [Disable support for the Intel SSE4.1 instructions]), sse41support=$enableval,sse41support=yes) AC_MSG_RESULT($sse41support) # Implementation of the --disable-drng-support switch. AC_MSG_CHECKING([whether DRNG support is requested]) AC_ARG_ENABLE(drng-support, AS_HELP_STRING([--disable-drng-support], [Disable support for the Intel DRNG (RDRAND instruction)]), drngsupport=$enableval,drngsupport=yes) AC_MSG_RESULT($drngsupport) # Implementation of the --disable-avx-support switch. AC_MSG_CHECKING([whether AVX support is requested]) AC_ARG_ENABLE(avx-support, AS_HELP_STRING([--disable-avx-support], [Disable support for the Intel AVX instructions]), avxsupport=$enableval,avxsupport=yes) AC_MSG_RESULT($avxsupport) # Implementation of the --disable-avx2-support switch. AC_MSG_CHECKING([whether AVX2 support is requested]) AC_ARG_ENABLE(avx2-support, AS_HELP_STRING([--disable-avx2-support], [Disable support for the Intel AVX2 instructions]), avx2support=$enableval,avx2support=yes) AC_MSG_RESULT($avx2support) # Implementation of the --disable-avx512-support switch. AC_MSG_CHECKING([whether AVX512 support is requested]) AC_ARG_ENABLE(avx512-support, AS_HELP_STRING([--disable-avx512-support], [Disable support for the Intel AVX512 instructions]), avx512support=$enableval,avx512support=yes) AC_MSG_RESULT($avx512support) # Implementation of the --disable-neon-support switch. AC_MSG_CHECKING([whether NEON support is requested]) AC_ARG_ENABLE(neon-support, AS_HELP_STRING([--disable-neon-support], [Disable support for the ARM NEON instructions]), neonsupport=$enableval,neonsupport=yes) AC_MSG_RESULT($neonsupport) # Implementation of the --disable-arm-crypto-support switch. AC_MSG_CHECKING([whether ARMv8 Crypto Extension support is requested]) AC_ARG_ENABLE(arm-crypto-support, AS_HELP_STRING([--disable-arm-crypto-support], [Disable support for the ARMv8 Crypto Extension instructions]), armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) # Implementation of the --disable-ppc-crypto-support switch. AC_MSG_CHECKING([whether PPC crypto support is requested]) AC_ARG_ENABLE(ppc-crypto-support, AS_HELP_STRING([--disable-ppc-crypto-support], [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]), ppccryptosupport=$enableval,ppccryptosupport=yes) AC_MSG_RESULT($ppccryptosupport) # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], AS_HELP_STRING([--disable-O-flag-munging], [Disable modification of the cc -O flag]), [enable_o_flag_munging=$enableval], [enable_o_flag_munging=yes]) AC_MSG_RESULT($enable_o_flag_munging) AM_CONDITIONAL(ENABLE_O_FLAG_MUNGING, test "$enable_o_flag_munging" = "yes") # Implementation of the --disable-instrumentation-munging switch. AC_MSG_CHECKING([whether a instrumentation (-fprofile, -fsanitize) munging is requested]) AC_ARG_ENABLE([instrumentation-munging], AS_HELP_STRING([--disable-instrumentation-munging], [Disable modification of the cc instrumentation options]), [enable_instrumentation_munging=$enableval], [enable_instrumentation_munging=yes]) AC_MSG_RESULT($enable_instrumentation_munging) AM_CONDITIONAL(ENABLE_INSTRUMENTATION_MUNGING, test "$enable_instrumentation_munging" = "yes") # Implementation of the --disable-amd64-as-feature-detection switch. AC_MSG_CHECKING([whether to enable AMD64 as(1) feature detection]) AC_ARG_ENABLE(amd64-as-feature-detection, AS_HELP_STRING([--disable-amd64-as-feature-detection], [Disable the auto-detection of AMD64 as(1) features]), amd64_as_feature_detection=$enableval, amd64_as_feature_detection=yes) AC_MSG_RESULT($amd64_as_feature_detection) AC_DEFINE_UNQUOTED(PRINTABLE_OS_NAME, "$PRINTABLE_OS_NAME", [A human readable text with the name of the OS]) # For some systems we know that we have ld_version scripts. # Use it then as default. have_ld_version_script=no case "${host}" in *-*-linux*) have_ld_version_script=yes ;; *-*-gnu*) have_ld_version_script=yes ;; esac AC_ARG_ENABLE([ld-version-script], AS_HELP_STRING([--enable-ld-version-script], [enable/disable use of linker version script. (default is system dependent)]), [have_ld_version_script=$enableval], [ : ] ) AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$have_ld_version_script" = "yes") AC_DEFINE_UNQUOTED(NAME_OF_DEV_RANDOM, "$NAME_OF_DEV_RANDOM", [defined to the name of the strong random device]) AC_DEFINE_UNQUOTED(NAME_OF_DEV_URANDOM, "$NAME_OF_DEV_URANDOM", [defined to the name of the weaker random device]) ############################### #### Checks for libraries. #### ############################### # # gpg-error is required. # AM_PATH_GPG_ERROR("$NEED_GPG_ERROR_VERSION") if test "x$GPG_ERROR_LIBS" = "x"; then AC_MSG_ERROR([libgpg-error is needed. See ftp://ftp.gnupg.org/gcrypt/libgpg-error/ .]) fi AC_DEFINE(GPG_ERR_SOURCE_DEFAULT, GPG_ERR_SOURCE_GCRYPT, [The default error source for libgcrypt.]) AM_CONDITIONAL(USE_GPGRT_CONFIG, [test -n "$GPGRT_CONFIG" \ -a "$ac_cv_path_GPG_ERROR_CONFIG" = no]) # # Check whether pthreads is available # if test "$have_w32_system" != yes; then AC_CHECK_LIB(pthread,pthread_create,have_pthread=yes) if test "$have_pthread" = yes; then AC_DEFINE(HAVE_PTHREAD, 1 ,[Define if we have pthread.]) fi fi # Solaris needs -lsocket and -lnsl. Unisys system includes # gethostbyname in libsocket but needs libnsl for socket. AC_SEARCH_LIBS(setsockopt, [socket], , [AC_SEARCH_LIBS(setsockopt, [socket], , , [-lnsl])]) AC_SEARCH_LIBS(setsockopt, [nsl]) ################################## #### Checks for header files. #### ################################## AC_CHECK_HEADERS(unistd.h sys/auxv.h sys/random.h) ########################################## #### Checks for typedefs, structures, #### #### and compiler characteristics. #### ########################################## AC_C_CONST AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_PID_T AC_CHECK_TYPES([byte, ushort, u16, u32, u64]) gl_TYPE_SOCKLEN_T # # Check for __builtin_bswap32 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap32, [gcry_cv_have_builtin_bswap32], [gcry_cv_have_builtin_bswap32=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [int x = 0; int y = __builtin_bswap32(x); return y;])], [gcry_cv_have_builtin_bswap32=yes])]) if test "$gcry_cv_have_builtin_bswap32" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP32,1, [Defined if compiler has '__builtin_bswap32' intrinsic]) fi # # Check for __builtin_bswap64 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap64, [gcry_cv_have_builtin_bswap64], [gcry_cv_have_builtin_bswap64=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [long long x = 0; long long y = __builtin_bswap64(x); return y;])], [gcry_cv_have_builtin_bswap64=yes])]) if test "$gcry_cv_have_builtin_bswap64" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP64,1, [Defined if compiler has '__builtin_bswap64' intrinsic]) fi # # Check for __builtin_ctz intrinsic. # AC_CACHE_CHECK(for __builtin_ctz, [gcry_cv_have_builtin_ctz], [gcry_cv_have_builtin_ctz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_ctz(x); return y;])], [gcry_cv_have_builtin_ctz=yes])]) if test "$gcry_cv_have_builtin_ctz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZ, 1, [Defined if compiler has '__builtin_ctz' intrinsic]) fi # # Check for __builtin_ctzl intrinsic. # AC_CACHE_CHECK(for __builtin_ctzl, [gcry_cv_have_builtin_ctzl], [gcry_cv_have_builtin_ctzl=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned long x = 0; long y = __builtin_ctzl(x); return y;])], [gcry_cv_have_builtin_ctzl=yes])]) if test "$gcry_cv_have_builtin_ctzl" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZL, 1, [Defined if compiler has '__builtin_ctzl' intrinsic]) fi # # Check for __builtin_clz intrinsic. # AC_CACHE_CHECK(for __builtin_clz, [gcry_cv_have_builtin_clz], [gcry_cv_have_builtin_clz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_clz(x); return y;])], [gcry_cv_have_builtin_clz=yes])]) if test "$gcry_cv_have_builtin_clz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CLZ, 1, [Defined if compiler has '__builtin_clz' intrinsic]) fi # # Check for __builtin_clzl intrinsic. # AC_CACHE_CHECK(for __builtin_clzl, [gcry_cv_have_builtin_clzl], [gcry_cv_have_builtin_clzl=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned long x = 0; long y = __builtin_clzl(x); return y;])], [gcry_cv_have_builtin_clzl=yes])]) if test "$gcry_cv_have_builtin_clzl" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CLZL, 1, [Defined if compiler has '__builtin_clzl' intrinsic]) fi # # Check for __sync_synchronize intrinsic. # AC_CACHE_CHECK(for __sync_synchronize, [gcry_cv_have_sync_synchronize], [gcry_cv_have_sync_synchronize=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [__sync_synchronize(); return 0;])], [gcry_cv_have_sync_synchronize=yes])]) if test "$gcry_cv_have_sync_synchronize" = "yes" ; then AC_DEFINE(HAVE_SYNC_SYNCHRONIZE, 1, [Defined if compiler has '__sync_synchronize' intrinsic]) fi # # Check for VLA support (variable length arrays). # AC_CACHE_CHECK(whether the variable length arrays are supported, [gcry_cv_have_vla], [gcry_cv_have_vla=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void f1(char *, int); char foo(int i) { char b[(i < 0 ? 0 : i) + 1]; f1(b, sizeof b); return b[0];}]])], [gcry_cv_have_vla=yes])]) if test "$gcry_cv_have_vla" = "yes" ; then AC_DEFINE(HAVE_VLA,1, [Defined if variable length arrays are supported]) fi # # Check for ELF visibility support. # AC_CACHE_CHECK(whether the visibility attribute is supported, gcry_cv_visibility_attribute, [gcry_cv_visibility_attribute=no AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo __attribute__ ((visibility ("hidden"))) = 1; int bar __attribute__ ((visibility ("protected"))) = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden.*foo' conftest.s >/dev/null 2>&1 ; then if grep '\.protected.*bar' conftest.s >/dev/null 2>&1; then gcry_cv_visibility_attribute=yes fi fi fi ]) if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken visibility attribute, gcry_cv_broken_visibility_attribute, [gcry_cv_broken_visibility_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo (int x); int bar (int x) __asm__ ("foo") __attribute__ ((visibility ("hidden"))); int bar (int x) { return x; } ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden@<:@ _@:>@foo' conftest.s >/dev/null 2>&1; then gcry_cv_broken_visibility_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken alias attribute, gcry_cv_broken_alias_attribute, [gcry_cv_broken_alias_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[extern int foo (int x) __asm ("xyzzy"); int bar (int x) { return x; } extern __typeof (bar) foo __attribute ((weak, alias ("bar"))); extern int dfoo; extern __typeof (dfoo) dfoo __asm ("abccb"); int dfoo = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep 'xyzzy' conftest.s >/dev/null 2>&1 && \ grep 'abccb' conftest.s >/dev/null 2>&1; then gcry_cv_broken_alias_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(if gcc supports -fvisibility=hidden, gcry_cv_gcc_has_f_visibility, [gcry_cv_gcc_has_f_visibility=no _gcc_cflags_save=$CFLAGS CFLAGS="-fvisibility=hidden" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])], gcry_cv_gcc_has_f_visibility=yes) CFLAGS=$_gcc_cflags_save; ]) fi if test "$gcry_cv_visibility_attribute" = "yes" \ && test "$gcry_cv_broken_visibility_attribute" != "yes" \ && test "$gcry_cv_broken_alias_attribute" != "yes" \ && test "$gcry_cv_gcc_has_f_visibility" = "yes" then AC_DEFINE(GCRY_USE_VISIBILITY, 1, [Define to use the GNU C visibility attribute.]) CFLAGS="$CFLAGS -fvisibility=hidden" fi # Following attribute tests depend on warnings to cause compile to fail, # so set -Werror temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether the compiler supports the GCC style aligned attribute # AC_CACHE_CHECK([whether the GCC style aligned attribute is supported], [gcry_cv_gcc_attribute_aligned], [gcry_cv_gcc_attribute_aligned=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct { int a; } foo __attribute__ ((aligned (16)));]])], [gcry_cv_gcc_attribute_aligned=yes])]) if test "$gcry_cv_gcc_attribute_aligned" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_ALIGNED,1, [Defined if a GCC style "__attribute__ ((aligned (n))" is supported]) fi # # Check whether the compiler supports the GCC style packed attribute # AC_CACHE_CHECK([whether the GCC style packed attribute is supported], [gcry_cv_gcc_attribute_packed], [gcry_cv_gcc_attribute_packed=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct foolong_s { long b; } __attribute__ ((packed)); struct foo_s { char a; struct foolong_s b; } __attribute__ ((packed)); enum bar { FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))), };]])], [gcry_cv_gcc_attribute_packed=yes])]) if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1, [Defined if a GCC style "__attribute__ ((packed))" is supported]) fi # # Check whether the compiler supports the GCC style may_alias attribute # AC_CACHE_CHECK([whether the GCC style may_alias attribute is supported], [gcry_cv_gcc_attribute_may_alias], [gcry_cv_gcc_attribute_may_alias=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[typedef struct foo_s { int a; } __attribute__ ((may_alias)) foo_t;]])], [gcry_cv_gcc_attribute_may_alias=yes])]) if test "$gcry_cv_gcc_attribute_may_alias" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MAY_ALIAS,1, [Defined if a GCC style "__attribute__ ((may_alias))" is supported]) fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether the compiler supports 'asm' or '__asm__' keyword for # assembler blocks. # AC_CACHE_CHECK([whether 'asm' assembler keyword is supported], [gcry_cv_have_asm], [gcry_cv_have_asm=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm("":::"memory"); }]])], [gcry_cv_have_asm=yes])]) AC_CACHE_CHECK([whether '__asm__' assembler keyword is supported], [gcry_cv_have___asm__], [gcry_cv_have___asm__=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("":::"memory"); }]])], [gcry_cv_have___asm__=yes])]) if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_DEFINE(asm,__asm__, [Define to supported assembler block keyword, if plain 'asm' was not supported]) fi fi # # Check whether the compiler supports inline assembly memory barrier. # if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(int x) { __asm__ volatile("":::"memory"); __asm__ volatile("":"+r"(x)::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi else AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(int x) { asm volatile("":::"memory"); asm volatile("":"+r"(x)::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_VOLATILE_MEMORY,1, [Define if inline asm memory barrier is supported]) fi # # Check whether GCC assembler supports features needed for our ARM # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly implementations], [gcry_cv_gcc_arm_platform_as_ok], [if test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_arm_platform_as_ok="n/a" else gcry_cv_gcc_arm_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if assembler supports UAL syntax. */ ".syntax unified\n\t" ".arm\n\t" /* our assembly code is in ARM mode */ ".text\n\t" /* Following causes error if assembler ignored '.syntax unified'. */ "asmfunc:\n\t" "add %r0, %r0, %r4, ror #12;\n\t" /* Test if '.type' and '.size' are supported. */ ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,%function;\n\t" );]], [ asmfunc(); ] )], [gcry_cv_gcc_arm_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARM assembly implementations]) fi # # Check whether GCC assembler supports features needed for our ARMv8/Aarch64 # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly implementations], [gcry_cv_gcc_aarch64_platform_as_ok], [if test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_aarch64_platform_as_ok="n/a" else gcry_cv_gcc_aarch64_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" "asmfunc:\n\t" "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" );]], [ asmfunc(); ] )], [gcry_cv_gcc_aarch64_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARMv8/Aarch64 assembly implementations]) fi # # Check whether GCC assembler supports for CFI directives. # AC_CACHE_CHECK([whether GCC assembler supports for CFI directives], [gcry_cv_gcc_asm_cfi_directives], [gcry_cv_gcc_asm_cfi_directives=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" "ac_test:\n\t" ".cfi_startproc\n\t" ".cfi_remember_state\n\t" ".cfi_adjust_cfa_offset 8\n\t" ".cfi_rel_offset 0, 8\n\t" ".cfi_def_cfa_register 1\n\t" ".cfi_register 2, 3\n\t" ".cfi_restore 2\n\t" ".cfi_escape 0x0f, 0x02, 0x11, 0x00\n\t" ".cfi_restore_state\n\t" ".long 0\n\t" ".cfi_endproc\n\t" );]])], [gcry_cv_gcc_asm_cfi_directives=yes])]) if test "$gcry_cv_gcc_asm_cfi_directives" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_CFI_DIRECTIVES,1, [Defined if underlying assembler supports for CFI directives]) fi # # Check whether GCC assembler supports for ELF directives. # AC_CACHE_CHECK([whether GCC assembler supports for ELF directives], [gcry_cv_gcc_asm_elf_directives], [gcry_cv_gcc_asm_elf_directives=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if ELF directives '.type' and '.size' are supported. */ ".text\n\t" "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,STT_FUNC;\n\t" );]])], [gcry_cv_gcc_asm_elf_directives=yes])]) if test "$gcry_cv_gcc_asm_elf_directives" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_ELF_DIRECTIVES,1, [Defined if underlying assembler supports for ELF directives]) fi # # Check whether underscores in symbols are required. This needs to be # done before setting up the assembler stuff. # GNUPG_SYS_SYMBOL_UNDERSCORE() ################################# #### #### #### Setup assembler stuff. #### #### Define mpi_cpu_arch. #### #### #### ################################# AC_ARG_ENABLE(mpi-path, AS_HELP_STRING([--enable-mpi-path=EXTRA_PATH], [prepend EXTRA_PATH to list of CPU specific optimizations]), mpi_extra_path="$enableval",mpi_extra_path="") AC_MSG_CHECKING(architecture and mpi assembler functions) if test -f $srcdir/mpi/config.links ; then . $srcdir/mpi/config.links AC_CONFIG_LINKS("$mpi_ln_list") ac_cv_mpi_sflags="$mpi_sflags" AC_MSG_RESULT($mpi_cpu_arch) else AC_MSG_RESULT(failed) AC_MSG_ERROR([mpi/config.links missing!]) fi MPI_SFLAGS="$ac_cv_mpi_sflags" AC_SUBST(MPI_SFLAGS) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_ADD1, test "$mpi_mod_asm_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_SUB1, test "$mpi_mod_asm_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL1, test "$mpi_mod_asm_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL2, test "$mpi_mod_asm_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL3, test "$mpi_mod_asm_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_LSHIFT, test "$mpi_mod_asm_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_RSHIFT, test "$mpi_mod_asm_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV, test "$mpi_mod_asm_udiv" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV_QRNND, test "$mpi_mod_asm_udiv_qrnnd" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_ADD1, test "$mpi_mod_c_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_SUB1, test "$mpi_mod_c_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL1, test "$mpi_mod_c_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL2, test "$mpi_mod_c_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL3, test "$mpi_mod_c_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_LSHIFT, test "$mpi_mod_c_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_RSHIFT, test "$mpi_mod_c_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV, test "$mpi_mod_c_udiv" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes) # Reset non applicable feature flags. if test "$mpi_cpu_arch" != "x86" ; then aesnisupport="n/a" shaextsupport="n/a" pclmulsupport="n/a" sse41support="n/a" avxsupport="n/a" avx2support="n/a" avx512support="n/a" padlocksupport="n/a" drngsupport="n/a" fi if test "$mpi_cpu_arch" != "arm" ; then if test "$mpi_cpu_arch" != "aarch64" ; then neonsupport="n/a" armcryptosupport="n/a" fi fi if test "$mpi_cpu_arch" != "ppc"; then ppccryptosupport="n/a" fi ############################################# #### #### #### Platform specific compiler checks. #### #### #### ############################################# # Following tests depend on warnings to cause compile to fail, so set -Werror # temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether compiler supports 'ms_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute], [gcry_cv_gcc_attribute_ms_abi], [gcry_cv_gcc_attribute_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((ms_abi)) proto(int);]])], [gcry_cv_gcc_attribute_ms_abi=yes])]) if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1, [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute]) fi # # Check whether compiler supports 'sysv_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute], [gcry_cv_gcc_attribute_sysv_abi], [gcry_cv_gcc_attribute_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((sysv_abi)) proto(int);]])], [gcry_cv_gcc_attribute_sysv_abi=yes])]) if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1, [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute]) fi # # Check whether default calling convention is 'ms_abi'. # if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'ms_abi'], [gcry_cv_gcc_default_abi_is_ms_abi], [gcry_cv_gcc_default_abi_is_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((ms_abi))(*msabi_func)(void); /* warning on SysV abi targets, passes on Windows based targets */ msabi_func = def_func; return msabi_func; }]])], [gcry_cv_gcc_default_abi_is_ms_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1, [Defined if default calling convention is 'ms_abi']) fi fi # # Check whether default calling convention is 'sysv_abi'. # if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'], [gcry_cv_gcc_default_abi_is_sysv_abi], [gcry_cv_gcc_default_abi_is_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((sysv_abi))(*sysvabi_func)(void); /* warning on MS ABI targets, passes on SysV ABI targets */ sysvabi_func = def_func; return sysvabi_func; }]])], [gcry_cv_gcc_default_abi_is_sysv_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1, [Defined if default calling convention is 'sysv_abi']) fi fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports SSSE3 instructions # This is required for the AES-NI instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSSE3 instructions], [gcry_cv_gcc_inline_asm_ssse3], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ssse3="n/a" else gcry_cv_gcc_inline_asm_ssse3=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[static unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; void a(void) { __asm__("pshufb %[mask], %%xmm2\n\t"::[mask]"m"(*be_mask):); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_ssse3=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ssse3" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSSE3,1, [Defined if inline assembler supports SSSE3 instructions]) fi # # Check whether GCC inline assembler supports PCLMUL instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions], [gcry_cv_gcc_inline_asm_pclmul], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_pclmul="n/a" else gcry_cv_gcc_inline_asm_pclmul=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_pclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1, [Defined if inline assembler supports PCLMUL instructions]) fi # # Check whether GCC inline assembler supports SHA Extensions instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SHA Extensions instructions], [gcry_cv_gcc_inline_asm_shaext], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_shaext="n/a" else gcry_cv_gcc_inline_asm_shaext=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("sha1rnds4 \$0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1nexte %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg2 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256rnds2 %%xmm0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg2 %%xmm1, %%xmm3\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_shaext=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_shaext" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SHAEXT,1, [Defined if inline assembler supports SHA Extensions instructions]) fi # # Check whether GCC inline assembler supports SSE4.1 instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSE4.1 instructions], [gcry_cv_gcc_inline_asm_sse41], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_sse41="n/a" else gcry_cv_gcc_inline_asm_sse41=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { int i; __asm__("pextrd \$2, %%xmm0, %[out]\n\t" : [out] "=m" (i)); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_sse41=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_sse41" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSE41,1, [Defined if inline assembler supports SSE4.1 instructions]) fi # # Check whether GCC inline assembler supports AVX instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions], [gcry_cv_gcc_inline_asm_avx], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx="n/a" else gcry_cv_gcc_inline_asm_avx=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX,1, [Defined if inline assembler supports AVX instructions]) fi # # Check whether GCC inline assembler supports AVX2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions], [gcry_cv_gcc_inline_asm_avx2], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx2="n/a" else gcry_cv_gcc_inline_asm_avx2=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX2,1, [Defined if inline assembler supports AVX2 instructions]) fi # # Check whether GCC inline assembler supports AVX512 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX512 instructions], [gcry_cv_gcc_inline_asm_avx512], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_avx512="n/a" else gcry_cv_gcc_inline_asm_avx512=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("xgetbv; vpopcntq %%zmm7, %%zmm1%{%%k1%}%{z%};\n\t":::"cc"); __asm__("vpexpandb %%zmm3, %%zmm1;\n\t":::"cc"); __asm__("vpxorq %%xmm7, %%xmm7, %%xmm7;\n\t":::"cc"); __asm__("vpxorq %%ymm7, %%ymm7, %%ymm7;\n\t":::"cc"); __asm__("vpxorq (%%eax)%{1to8%}, %%zmm7, %%zmm7;\n\t":::"cc"); }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_avx512=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx512" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX512,1, [Defined if inline assembler supports AVX512 instructions]) fi # # Check whether GCC inline assembler supports VAES and VPCLMUL instructions # AC_CACHE_CHECK([whether GCC inline assembler supports VAES and VPCLMUL instructions], [gcry_cv_gcc_inline_asm_vaes_vpclmul], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_vaes_vpclmul="n/a" else gcry_cv_gcc_inline_asm_vaes_vpclmul=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void a(void) { __asm__("vaesenclast %%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/ __asm__("vaesenclast %%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/ __asm__("vpclmulqdq \$0,%%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/ __asm__("vpclmulqdq \$0,%%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/ }]], [ a(); ] )], [gcry_cv_gcc_inline_asm_vaes_vpclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_vaes_vpclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL,1, [Defined if inline assembler supports VAES and VPCLMUL instructions]) fi # # Check whether GCC inline assembler supports BMI2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports BMI2 instructions], [gcry_cv_gcc_inline_asm_bmi2], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_bmi2="n/a" else gcry_cv_gcc_inline_asm_bmi2=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[unsigned int a(unsigned int x, unsigned int y) { unsigned int tmp1, tmp2; asm ("rorxl %2, %1, %0" : "=r" (tmp1) : "rm0" (x), "J" (32 - ((23) & 31))); asm ("andnl %2, %1, %0" : "=r" (tmp2) : "r0" (x), "rm" (y)); return tmp1 + tmp2; }]], [ a(1, 2); ] )], [gcry_cv_gcc_inline_asm_bmi2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_bmi2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_BMI2,1, [Defined if inline assembler supports BMI2 instructions]) fi # # Check whether GCC assembler needs "-Wa,--divide" to correctly handle # constant division # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler handles division correctly], [gcry_cv_gcc_as_const_division_ok], [gcry_cv_gcc_as_const_division_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t");]], [fn();])], [gcry_cv_gcc_as_const_division_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_ok" = "no" ; then # # Add '-Wa,--divide' to CPPFLAGS and try check again. # _gcc_cppflags_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -Wa,--divide" AC_CACHE_CHECK([whether GCC assembler handles division correctly with "-Wa,--divide"], [gcry_cv_gcc_as_const_division_with_wadivide_ok], [gcry_cv_gcc_as_const_division_with_wadivide_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t");]], [fn();])], [gcry_cv_gcc_as_const_division_with_wadivide_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_with_wadivide_ok" = "no" ; then # '-Wa,--divide' did not work, restore old flags. CPPFLAGS="$_gcc_cppflags_save" fi fi fi # # Check whether GCC assembler supports features needed for our amd64 # implementations # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler is compatible for amd64 assembly implementations], [gcry_cv_gcc_amd64_platform_as_ok], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_amd64_platform_as_ok="n/a" else gcry_cv_gcc_amd64_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( /* Test if '.type' and '.size' are supported. */ /* These work only on ELF targets. */ ".text\n\t" "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,@function;\n\t" /* Test if assembler allows use of '/' for constant division * (Solaris/x86 issue). If previous constant division check * and "-Wa,--divide" workaround failed, this causes assembly * to be disable on this machine. */ "xorl \$(123456789/12345678), %ebp;\n\t" );]], [ asmfunc(); ])], [gcry_cv_gcc_amd64_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with amd64 assembly implementations]) fi if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" && test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" && test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations], [gcry_cv_gcc_win64_platform_as_ok], [gcry_cv_gcc_win64_platform_as_ok=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" ".globl asmfunc\n\t" "asmfunc:\n\t" "xorq \$(1234), %rbp;\n\t" );]], [ asmfunc(); ])], [gcry_cv_gcc_win64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with WIN64 assembly implementations]) fi fi fi # # Check whether GCC assembler supports features needed for assembly # implementations that use Intel syntax # AC_CACHE_CHECK([whether GCC assembler is compatible for Intel syntax assembly implementations], [gcry_cv_gcc_platform_as_ok_for_intel_syntax], [if test "$mpi_cpu_arch" != "x86" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_platform_as_ok_for_intel_syntax="n/a" else gcry_cv_gcc_platform_as_ok_for_intel_syntax=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".intel_syntax noprefix\n\t" ".text\n\t" "actest:\n\t" "pxor xmm1, xmm7;\n\t" "vperm2i128 ymm2, ymm3, ymm0, 1;\n\t" "add eax, ebp;\n\t" "rorx eax, ebp, 1;\n\t" "sub eax, [esp + 4];\n\t" "add dword ptr [esp + eax], 0b10101;\n\t" ".att_syntax prefix\n\t" );]], [ actest(); ])], [gcry_cv_gcc_platform_as_ok_for_intel_syntax=yes]) fi]) if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then AC_DEFINE(HAVE_INTEL_SYNTAX_PLATFORM_AS,1, [Defined if underlying assembler is compatible with Intel syntax assembly implementations]) fi # # Check whether compiler is configured for ARMv6 or newer architecture # AC_CACHE_CHECK([whether compiler is configured for ARMv6 or newer architecture], [gcry_cv_cc_arm_arch_is_v6], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_cc_arm_arch_is_v6="n/a" else gcry_cv_cc_arm_arch_is_v6=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[ #if defined(__arm__) && \ ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7EM__)) /* empty */ #else /* fail compile if not ARMv6. */ not_armv6 not_armv6 = (not_armv6)not_armv6; #endif ]])], [gcry_cv_cc_arm_arch_is_v6=yes]) fi]) if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then AC_DEFINE(HAVE_ARM_ARCH_V6,1, [Defined if ARM architecture is v6 or newer]) fi # # Check whether GCC inline assembler supports NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports NEON instructions], [gcry_cv_gcc_inline_asm_neon], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_neon="n/a" else gcry_cv_gcc_inline_asm_neon=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".syntax unified\n\t" ".arm\n\t" ".fpu neon\n\t" ".text\n\t" "testfn:\n\t" "vld1.64 {%q0-%q1}, [%r0]!;\n\t" "vrev64.8 %q0, %q3;\n\t" "vadd.u64 %q0, %q1;\n\t" "vadd.s64 %d3, %d2, %d3;\n\t" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_NEON,1, [Defined if inline assembler supports NEON instructions]) fi # # Check whether GCC inline assembler supports AArch32 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch32 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch32_crypto], [if test "$mpi_cpu_arch" != "arm" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch32_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch32_crypto=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".syntax unified\n\t" ".arch armv8-a\n\t" ".arm\n\t" ".fpu crypto-neon-fp-armv8\n\t" ".text\n\t" "testfn:\n\t" "sha1h.32 q0, q0;\n\t" "sha1c.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha1su0.32 q0, q0, q0;\n\t" "sha1su1.32 q0, q0;\n\t" "sha256h.32 q0, q0, q0;\n\t" "sha256h2.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha256su0.32 q0, q0;\n\t" "sha256su1.32 q0, q0, q15;\n\t" "aese.8 q0, q0;\n\t" "aesd.8 q0, q0;\n\t" "aesmc.8 q0, q0;\n\t" "aesimc.8 q0, q0;\n\t" "vmull.p64 q0, d0, d0;\n\t" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch32_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO,1, [Defined if inline assembler supports AArch32 Crypto Extension instructions]) fi # # Check whether GCC inline assembler supports AArch64 NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 NEON instructions], [gcry_cv_gcc_inline_asm_aarch64_neon], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_neon="n/a" else gcry_cv_gcc_inline_asm_aarch64_neon=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".cpu generic+simd\n\t" ".text\n\t" "testfn:\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_NEON,1, [Defined if inline assembler supports AArch64 NEON instructions]) fi # # Check whether GCC inline assembler supports AArch64 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch64_crypto], [if test "$mpi_cpu_arch" != "aarch64" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_aarch64_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch64_crypto=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".cpu generic+simd+crypto\n\t" ".text\n\t" "testfn:\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" "sha1h s0, s0;\n\t" "sha1c q0, s0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha1su0 v0.4s, v0.4s, v0.4s;\n\t" "sha1su1 v0.4s, v0.4s;\n\t" "sha256h q0, q0, v0.4s;\n\t" "sha256h2 q0, q0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha256su0 v0.4s, v0.4s;\n\t" "sha256su1 v0.4s, v0.4s, v31.4s;\n\t" "aese v0.16b, v0.16b;\n\t" "aesd v0.16b, v0.16b;\n\t" "aesmc v0.16b, v0.16b;\n\t" "aesimc v0.16b, v0.16b;\n\t" "pmull v0.1q, v0.1d, v31.1d;\n\t" "pmull2 v0.1q, v0.2d, v31.2d;\n\t" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_aarch64_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO,1, [Defined if inline assembler supports AArch64 Crypto Extension instructions]) fi # # Check whether PowerPC AltiVec/VSX intrinsics # AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics], [gcry_cv_cc_ppc_altivec], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_cc_ppc_altivec="n/a" else gcry_cv_cc_ppc_altivec=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include typedef vector unsigned char block; typedef vector unsigned int vecu32; static inline __attribute__((always_inline)) vecu32 vec_sld_u32(vecu32 a, vecu32 b, unsigned int idx) { return vec_sld (a, b, (4 * idx) & 15); } block fn(block in) { block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); vecu32 y = vec_vsx_ld (0, (unsigned int*)0); y = vec_sld_u32 (y, y, 3); return vec_cipher_be (t, in) ^ (block)y; } ]])], [gcry_cv_cc_ppc_altivec=yes]) fi]) if test "$gcry_cv_cc_ppc_altivec" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics]) fi _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -O2 -maltivec -mvsx -mcrypto" if test "$gcry_cv_cc_ppc_altivec" = "no" && test "$mpi_cpu_arch" = "ppc" && test "$try_asm_modules" == "yes" ; then AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags], [gcry_cv_cc_ppc_altivec_cflags], [gcry_cv_cc_ppc_altivec_cflags=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include typedef vector unsigned char block; typedef vector unsigned int vecu32; static inline __attribute__((always_inline)) vecu32 vec_sld_u32(vecu32 a, vecu32 b, unsigned int idx) { return vec_sld (a, b, (4 * idx) & 15); } block fn(block in) { block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); vecu32 y = vec_vsx_ld (0, (unsigned int*)0); y = vec_sld_u32 (y, y, 3); return vec_cipher_be (t, in) ^ (block)y; }]])], [gcry_cv_cc_ppc_altivec_cflags=yes])]) if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics]) AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS,1, [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags]) fi fi AM_CONDITIONAL(ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS, test "$gcry_cv_cc_ppc_altivec_cflags" = "yes") # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions # AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions], [gcry_cv_gcc_inline_asm_ppc_altivec], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ppc_altivec="n/a" else gcry_cv_gcc_inline_asm_ppc_altivec=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".globl testfn;\n" ".text\n\t" "testfn:\n" "stvx %v31,%r12,%r0;\n" "lvx %v20,%r12,%r0;\n" "vcipher %v0, %v1, %v22;\n" "lxvw4x %vs32, %r0, %r1;\n" "vadduwm %v0, %v1, %v22;\n" "vshasigmaw %v0, %v1, 0, 15;\n" "vshasigmad %v0, %v1, 0, 15;\n" "vpmsumd %v11, %v11, %v11;\n" ); ]], [ testfn(); ] )], [gcry_cv_gcc_inline_asm_ppc_altivec=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC,1, [Defined if inline assembler supports PowerPC AltiVec/VSX/crypto instructions]) fi # # Check whether GCC inline assembler supports PowerISA 3.00 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports PowerISA 3.00 instructions], [gcry_cv_gcc_inline_asm_ppc_arch_3_00], [if test "$mpi_cpu_arch" != "ppc" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_ppc_arch_3_00="n/a" else gcry_cv_gcc_inline_asm_ppc_arch_3_00=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__(".text\n\t" ".globl testfn;\n" "testfn:\n" "stxvb16x %r1,%v12,%v30;\n" ); ]], [ testfn(); ])], [gcry_cv_gcc_inline_asm_ppc_arch_3_00=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00,1, [Defined if inline assembler supports PowerISA 3.00 instructions]) fi # # Check whether GCC inline assembler supports zSeries instructions # AC_CACHE_CHECK([whether GCC inline assembler supports zSeries instructions], [gcry_cv_gcc_inline_asm_s390x], [if test "$mpi_cpu_arch" != "s390x" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_s390x="n/a" else gcry_cv_gcc_inline_asm_s390x=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[typedef unsigned int u128_t __attribute__ ((mode (TI))); unsigned int testfunc(unsigned int x, void *y, unsigned int z) { unsigned long fac[8]; register unsigned long reg0 asm("0") = 0; register unsigned long reg1 asm("1") = x; u128_t r1 = ((u128_t)(unsigned long)y << 64) | (unsigned long)z; u128_t r2 = 0; u128_t r3 = 0; asm volatile (".insn rre,0xb92e << 16, %[r1], %[r2]\n\t" : [r1] "+a" (r1), [r2] "+a" (r2) : "r" (reg0), "r" (reg1) : "cc", "memory"); asm volatile (".insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t" : [r1] "+a" (r1), [r2] "+a" (r2), [r3] "+a" (r3) : "r" (reg0), "r" (reg1) : "cc", "memory"); reg0 = 8 - 1; asm ("stfle %1\n\t" : "+d" (reg0), "=Q" (fac[0]) : : "cc", "memory"); asm volatile ("mvc 0(16, %0), 0(%1)\n\t" : : "a" (y), "a" (fac) : "memory"); asm volatile ("xc 0(16, %0), 0(%0)\n\t" : : "a" (fac) : "memory"); asm volatile ("risbgn %%r11, %%r11, 0, 129, 0\n\t" : : : "memory", "r11"); asm volatile ("algrk %%r14, %%r14, %%r14\n\t" : : : "memory", "r14"); return (unsigned int)r1 ^ reg0; } ]] , [ testfunc(0, 0, 0); ])], [gcry_cv_gcc_inline_asm_s390x=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_S390X,1, [Defined if inline assembler supports zSeries instructions]) fi # # Check whether GCC inline assembler supports zSeries vector instructions # AC_CACHE_CHECK([whether GCC inline assembler supports zSeries vector instructions], [gcry_cv_gcc_inline_asm_s390x_vx], [if test "$mpi_cpu_arch" != "s390x" || test "$try_asm_modules" != "yes" ; then gcry_cv_gcc_inline_asm_s390x_vx="n/a" else gcry_cv_gcc_inline_asm_s390x_vx=no if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then AC_LINK_IFELSE([AC_LANG_PROGRAM( [[void testfunc(void) { asm volatile (".machine \"z13+vx\"\n\t" "vx %%v0, %%v1, %%v31\n\t" "verllf %%v11, %%v11, (16)(0)\n\t" : : : "memory"); } ]], [ testfunc(); ])], [gcry_cv_gcc_inline_asm_s390x_vx=yes]) fi fi]) if test "$gcry_cv_gcc_inline_asm_s390x_vx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_S390X_VX,1, [Defined if inline assembler supports zSeries vector instructions]) fi ####################################### #### Checks for library functions. #### ####################################### AC_FUNC_VPRINTF # We have replacements for these in src/missing-string.c AC_CHECK_FUNCS(stpcpy strcasecmp) # We have replacements for these in src/g10lib.h AC_CHECK_FUNCS(strtoul memmove stricmp atexit raise) # Other checks AC_CHECK_FUNCS(strerror rand mmap getpagesize sysconf waitpid wait4) AC_CHECK_FUNCS(gettimeofday getrusage gethrtime clock_gettime syslog) AC_CHECK_FUNCS(syscall fcntl ftruncate flockfile getauxval elf_aux_info) AC_CHECK_FUNCS(explicit_bzero explicit_memset getentropy) GNUPG_CHECK_MLOCK # # Replacement functions. # AC_REPLACE_FUNCS([getpid clock]) # # Check whether it is necessary to link against libdl. # DL_LIBS="" if test "$use_hmac_binary_check" != no ; then _gcry_save_libs="$LIBS" LIBS="" AC_SEARCH_LIBS(dlopen, c dl,,,) DL_LIBS=$LIBS LIBS="$_gcry_save_libs" fi AC_SUBST(DL_LIBS) # # Check whether we can use Linux capabilities as requested. # if test "$use_capabilities" = "yes" ; then use_capabilities=no AC_CHECK_HEADERS(sys/capability.h) if test "$ac_cv_header_sys_capability_h" = "yes" ; then AC_CHECK_LIB(cap, cap_init, ac_need_libcap=1) if test "$ac_cv_lib_cap_cap_init" = "yes"; then AC_DEFINE(USE_CAPABILITIES,1, [define if capabilities should be used]) LIBS="$LIBS -lcap" use_capabilities=yes fi fi if test "$use_capabilities" = "no" ; then AC_MSG_WARN([[ *** *** The use of capabilities on this system is not possible. *** You need a recent Linux kernel and some patches: *** fcaps-2.2.9-990610.patch (kernel patch for 2.2.9) *** fcap-module-990613.tar.gz (kernel module) *** libcap-1.92.tar.gz (user mode library and utilities) *** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN *** set (filesystems menu). Be warned: This code is *really* ALPHA. ***]]) fi fi # Check whether a random device is available. if test "$try_dev_random" = yes ; then AC_CACHE_CHECK(for random device, ac_cv_have_dev_random, [if test -r "$NAME_OF_DEV_RANDOM" && test -r "$NAME_OF_DEV_URANDOM" ; then ac_cv_have_dev_random=yes; else ac_cv_have_dev_random=no; fi]) if test "$ac_cv_have_dev_random" = yes; then AC_DEFINE(HAVE_DEV_RANDOM,1, [defined if the system supports a random device] ) fi else AC_MSG_CHECKING(for random device) ac_cv_have_dev_random=no AC_MSG_RESULT(has been disabled) fi # Figure out the random modules for this configuration. if test "$random" = "default"; then # Select default value. if test "$ac_cv_func_getentropy" = yes; then random_modules="getentropy" elif test "$ac_cv_have_dev_random" = yes; then # Try Linuxish random device. random_modules="linux" else case "${host}" in *-*-mingw32ce*) # WindowsCE random device. random_modules="w32ce" ;; *-*-mingw32*|*-*-cygwin*) # Windows random device. random_modules="w32" ;; *) # Build everything, allow to select at runtime. random_modules="$auto_random_modules" ;; esac fi else if test "$random" = "auto"; then # Build everything, allow to select at runtime. random_modules="$auto_random_modules" else random_modules="$random" fi fi # # Other defines # if test mym4_isgit = "yes"; then AC_DEFINE(IS_DEVELOPMENT_VERSION,1, [Defined if this is not a regular release]) fi AM_CONDITIONAL(CROSS_COMPILING, test x$cross_compiling = xyes) # This is handy for debugging so the compiler doesn't rearrange # things and eliminate variables. AC_ARG_ENABLE(optimization, AS_HELP_STRING([--disable-optimization], [disable compiler optimization]), [if test $enableval = no ; then CFLAGS=`echo $CFLAGS | sed 's/-O[[0-9]]//'` fi]) AC_MSG_NOTICE([checking for cc features]) # CFLAGS mangling when using gcc. if test "$GCC" = yes; then AC_MSG_CHECKING([if gcc supports -fno-delete-null-pointer-checks]) _gcc_cflags_save=$CFLAGS CFLAGS="-fno-delete-null-pointer-checks" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -fno-delete-null-pointer-checks" fi CFLAGS="$CFLAGS -Wall" if test "$USE_MAINTAINER_MODE" = "yes"; then CFLAGS="$CFLAGS -Wcast-align -Wshadow -Wstrict-prototypes" CFLAGS="$CFLAGS -Wformat -Wno-format-y2k -Wformat-security" # If -Wno-missing-field-initializers is supported we can enable a # a bunch of really useful warnings. AC_MSG_CHECKING([if gcc supports -Wno-missing-field-initializers]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wno-missing-field-initializers" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -W -Wextra -Wbad-function-cast" CFLAGS="$CFLAGS -Wwrite-strings" CFLAGS="$CFLAGS -Wdeclaration-after-statement" CFLAGS="$CFLAGS -Wno-missing-field-initializers" CFLAGS="$CFLAGS -Wno-sign-compare" fi AC_MSG_CHECKING([if gcc supports -Wpointer-arith]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wpointer-arith" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -Wpointer-arith" fi fi fi # Check whether as(1) supports a noeexecstack feature. This test # includes an override option. CL_AS_NOEXECSTACK AC_SUBST(LIBGCRYPT_CONFIG_API_VERSION) AC_SUBST(LIBGCRYPT_CONFIG_LIBS) AC_SUBST(LIBGCRYPT_CONFIG_CFLAGS) AC_SUBST(LIBGCRYPT_CONFIG_HOST) AC_SUBST(LIBGCRYPT_THREAD_MODULES) AC_CONFIG_COMMANDS([gcrypt-conf],[[ chmod +x src/libgcrypt-config ]],[[ prefix=$prefix exec_prefix=$exec_prefix libdir=$libdir datadir=$datadir DATADIRNAME=$DATADIRNAME ]]) ##################### #### Conclusion. #### ##################### # Check that requested feature can actually be used and define # ENABLE_foo_SUPPORT macros. if test x"$aesnisupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_ssse3" != "yes" ; then aesnisupport="no (unsupported by compiler)" fi fi if test x"$shaextsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_shaext" != "yes" ; then shaextsupport="no (unsupported by compiler)" fi fi if test x"$pclmulsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then pclmulsupport="no (unsupported by compiler)" fi fi if test x"$sse41support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_sse41" != "yes" ; then sse41support="no (unsupported by compiler)" fi fi if test x"$avxsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then avxsupport="no (unsupported by compiler)" fi fi if test x"$avx2support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx2" != "yes" ; then avx2support="no (unsupported by compiler)" fi fi if test x"$avx512support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx512" != "yes" ; then avx512support="no (unsupported by compiler)" fi fi if test x"$neonsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_neon" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_neon" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$armcryptosupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$aesnisupport" = xyes ; then AC_DEFINE(ENABLE_AESNI_SUPPORT, 1, [Enable support for Intel AES-NI instructions.]) fi if test x"$shaextsupport" = xyes ; then AC_DEFINE(ENABLE_SHAEXT_SUPPORT, 1, [Enable support for Intel SHAEXT instructions.]) fi if test x"$pclmulsupport" = xyes ; then AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1, [Enable support for Intel PCLMUL instructions.]) fi if test x"$sse41support" = xyes ; then AC_DEFINE(ENABLE_SSE41_SUPPORT, 1, [Enable support for Intel SSE4.1 instructions.]) fi if test x"$avxsupport" = xyes ; then AC_DEFINE(ENABLE_AVX_SUPPORT,1, [Enable support for Intel AVX instructions.]) fi if test x"$avx2support" = xyes ; then AC_DEFINE(ENABLE_AVX2_SUPPORT,1, [Enable support for Intel AVX2 instructions.]) fi if test x"$avx512support" = xyes ; then AC_DEFINE(ENABLE_AVX512_SUPPORT,1, [Enable support for Intel AVX512 instructions.]) fi if test x"$neonsupport" = xyes ; then AC_DEFINE(ENABLE_NEON_SUPPORT,1, [Enable support for ARM NEON instructions.]) fi if test x"$armcryptosupport" = xyes ; then AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi if test x"$ppccryptosupport" = xyes ; then AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1, [Enable support for POWER 8 (PowerISA 2.07) crypto extension.]) fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) fi if test x"$padlocksupport" = xyes ; then AC_DEFINE(ENABLE_PADLOCK_SUPPORT, 1, [Enable support for the PadLock engine.]) fi if test x"$drngsupport" = xyes ; then AC_DEFINE(ENABLE_DRNG_SUPPORT, 1, [Enable support for Intel DRNG (RDRAND instruction).]) fi if test x"$force_soft_hwfeatures" = xyes ; then AC_DEFINE(ENABLE_FORCE_SOFT_HWFEATURES, 1, [Enable forcing 'soft' HW feature bits on (for testing).]) fi # Define conditional sources and config.h symbols depending on the # selected ciphers, pubkey-ciphers, digests, kdfs, and random modules. LIST_MEMBER(arcfour, $enabled_ciphers) if test "$found" = "1"; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo" AC_DEFINE(USE_ARCFOUR, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS arcfour-amd64.lo" ;; esac fi LIST_MEMBER(blowfish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish.lo" AC_DEFINE(USE_BLOWFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-arm.lo" ;; esac fi LIST_MEMBER(cast5, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo" AC_DEFINE(USE_CAST5, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-arm.lo" ;; esac fi LIST_MEMBER(des, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo" AC_DEFINE(USE_DES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS des-amd64.lo" ;; esac fi LIST_MEMBER(aes, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael.lo" AC_DEFINE(USE_AES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-amd64.lo" # Build with the SSSE3 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64-asm.lo" # Build with the VAES/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes-avx2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-arm.lo" # Build with the ARMv8/AArch32 CE implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aarch64.lo" # Build with the ARMv8/AArch64 CE implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc9le.lo" if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" && test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then # Build with AES-GCM bulk implementation for P10 GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-gcm-p10le.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-p10le.lo" fi ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo" ;; s390x-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-s390x.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the AES-NI implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aesni.lo" # Build with the Padlock implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-padlock.lo" ;; esac fi LIST_MEMBER(twofish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish.lo" AC_DEFINE(USE_TWOFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-amd64.lo" if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-avx2-amd64.lo" fi ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-aarch64.lo" ;; esac fi LIST_MEMBER(serpent, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent.lo" AC_DEFINE(USE_SERPENT, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the SSE2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-sse2-amd64.lo" ;; esac if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-avx2-amd64.lo" fi if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-armv7-neon.lo" fi fi LIST_MEMBER(rfc2268, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rfc2268.lo" AC_DEFINE(USE_RFC2268, 1, [Defined if this module should be included]) fi LIST_MEMBER(seed, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS seed.lo" AC_DEFINE(USE_SEED, 1, [Defined if this module should be included]) fi LIST_MEMBER(camellia, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia.lo camellia-glue.lo" AC_DEFINE(USE_CAMELLIA, 1, [Defined if this module should be included]) case "${host}" in arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aarch64.lo" ;; esac if test x"$avxsupport" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx-amd64.lo" fi fi if test x"$avx2support" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx2-amd64.lo" # Build with the VAES/AVX2 implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-vaes-avx2-amd64.lo" fi fi fi LIST_MEMBER(idea, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS idea.lo" AC_DEFINE(USE_IDEA, 1, [Defined if this module should be included]) fi LIST_MEMBER(salsa20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20.lo" AC_DEFINE(USE_SALSA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-amd64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-armv7-neon.lo" fi fi LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS gost28147.lo" AC_DEFINE(USE_GOST28147, 1, [Defined if this module should be included]) fi LIST_MEMBER(chacha20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20.lo" AC_DEFINE(USE_CHACHA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-ssse3.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-avx2.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-aarch64.lo" ;; powerpc64le-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" ;; powerpc64-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" ;; powerpc-*-*) # Build with the ppc8 vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo" ;; s390x-*-*) # Build with the s390x/zSeries vector implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-s390x.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-armv7-neon.lo" fi fi LIST_MEMBER(sm4, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS sm4.lo" AC_DEFINE(USE_SM4, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx-amd64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx2-amd64.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aarch64.lo" GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-armv8-aarch64-ce.lo" esac fi LIST_MEMBER(dsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS dsa.lo" AC_DEFINE(USE_DSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(rsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS rsa.lo" AC_DEFINE(USE_RSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(elgamal, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS elgamal.lo" AC_DEFINE(USE_ELGAMAL, 1, [Defined if this module should be included]) fi LIST_MEMBER(ecc, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo \ ecc-sm2.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi LIST_MEMBER(crc, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc.lo" AC_DEFINE(USE_CRC, 1, [Defined if this module should be included]) case "${host}" in i?86-*-* | x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-intel-pclmul.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-ce.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; powerpc64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; powerpc-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo" ;; esac fi LIST_MEMBER(gostr3411-94, $enabled_digests) if test "$found" = "1" ; then # GOST R 34.11-94 internally uses GOST 28147-89 LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS gostr3411-94.lo" AC_DEFINE(USE_GOST_R_3411_94, 1, [Defined if this module should be included]) fi fi LIST_MEMBER(stribog, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS stribog.lo" AC_DEFINE(USE_GOST_R_3411_12, 1, [Defined if this module should be included]) fi LIST_MEMBER(md2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md2.lo" AC_DEFINE(USE_MD2, 1, [Defined if this module should be included]) fi LIST_MEMBER(md4, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md4.lo" AC_DEFINE(USE_MD4, 1, [Defined if this module should be included]) fi LIST_MEMBER(md5, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md5.lo" AC_DEFINE(USE_MD5, 1, [Defined if this module should be included]) fi LIST_MEMBER(rmd160, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS rmd160.lo" AC_DEFINE(USE_RMD160, 1, [Defined if this module should be included]) fi LIST_MEMBER(sha256, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256.lo" AC_DEFINE(USE_SHA256, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch64-ce.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo" esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-intel-shaext.lo" ;; esac fi LIST_MEMBER(sha512, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512.lo" AC_DEFINE(USE_SHA512, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx2-bmi2-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx512-amd64.lo" ;; i?86-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-i386.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-arm.lo" ;; powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo" esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-armv7-neon.lo" fi fi LIST_MEMBER(sha3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak.lo" AC_DEFINE(USE_SHA3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation : ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS keccak-armv7-neon.lo" fi fi LIST_MEMBER(tiger, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS tiger.lo" AC_DEFINE(USE_TIGER, 1, [Defined if this module should be included]) fi LIST_MEMBER(whirlpool, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool.lo" AC_DEFINE(USE_WHIRLPOOL, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS whirlpool-sse2-amd64.lo" ;; esac fi LIST_MEMBER(blake2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2.lo" AC_DEFINE(USE_BLAKE2, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2b-amd64-avx2.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2s-amd64-avx.lo" ;; esac fi LIST_MEMBER(sm3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo" AC_DEFINE(USE_SM3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-avx-bmi2-amd64.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-aarch64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-armv8-aarch64-ce.lo" ;; esac fi # SHA-1 needs to be included always for example because it is used by # random-csprng.c. GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1.lo" AC_DEFINE(USE_SHA1, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-ssse3-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-bmi2-amd64.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv7-neon.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-intel-shaext.lo" ;; esac # Arch specific GCM implementations case "${host}" in i?86-*-* | x86_64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-intel-pclmul.lo" ;; arm*-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv7-neon.lo" GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch32-ce.lo" ;; aarch64-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch64-ce.lo" ;; powerpc64le-*-* | powerpc64-*-* | powerpc-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-ppc.lo" ;; esac # Arch specific MAC implementations case "${host}" in s390x-*-*) GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-s390x.lo" ;; + x86_64-*-*) + GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-amd64-avx512.lo" + ;; esac LIST_MEMBER(scrypt, $enabled_kdfs) if test "$found" = "1" ; then GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo" AC_DEFINE(USE_SCRYPT, 1, [Defined if this module should be included]) fi LIST_MEMBER(getentropy, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndgetentropy.lo" AC_DEFINE(USE_RNDGETENTROPY, 1, [Defined if the getentropy RNG should be used.]) fi LIST_MEMBER(linux, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndoldlinux.lo" AC_DEFINE(USE_RNDOLDLINUX, 1, [Defined if the /dev/random RNG should be used.]) fi LIST_MEMBER(unix, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndunix.lo" AC_DEFINE(USE_RNDUNIX, 1, [Defined if the default Unix RNG should be used.]) fi LIST_MEMBER(egd, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndegd.lo" AC_DEFINE(USE_RNDEGD, 1, [Defined if the EGD based RNG should be used.]) fi LIST_MEMBER(w32, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32.lo" AC_DEFINE(USE_RNDW32, 1, [Defined if the Windows specific RNG should be used.]) fi LIST_MEMBER(w32ce, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32ce.lo" AC_DEFINE(USE_RNDW32CE, 1, [Defined if the WindowsCE specific RNG should be used.]) fi if test "$try_asm_modules" = yes ; then # Build with assembly implementations GCRYPT_CIPHERS="$GCRYPT_CIPHERS $GCRYPT_ASM_CIPHERS" GCRYPT_DIGESTS="$GCRYPT_DIGESTS $GCRYPT_ASM_DIGESTS" fi AC_SUBST([GCRYPT_CIPHERS]) AC_SUBST([GCRYPT_PUBKEY_CIPHERS]) AC_SUBST([GCRYPT_DIGESTS]) AC_SUBST([GCRYPT_KDFS]) AC_SUBST([GCRYPT_RANDOM]) AC_SUBST(LIBGCRYPT_CIPHERS, $enabled_ciphers) AC_SUBST(LIBGCRYPT_PUBKEY_CIPHERS, $enabled_pubkey_ciphers) AC_SUBST(LIBGCRYPT_DIGESTS, $enabled_digests) # For printing the configuration we need a colon separated list of # algorithm names. tmp=`echo "$enabled_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_CIPHERS, "$tmp", [List of available cipher algorithms]) tmp=`echo "$enabled_pubkey_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_PUBKEY_CIPHERS, "$tmp", [List of available public key cipher algorithms]) tmp=`echo "$enabled_digests" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_DIGESTS, "$tmp", [List of available digest algorithms]) tmp=`echo "$enabled_kdfs" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_KDFS, "$tmp", [List of available KDF algorithms]) # # Define conditional sources depending on the used hardware platform. # Note that all possible modules must also be listed in # src/Makefile.am (EXTRA_libgcrypt_la_SOURCES). # GCRYPT_HWF_MODULES= case "$mpi_cpu_arch" in x86) AC_DEFINE(HAVE_CPU_ARCH_X86, 1, [Defined for the x86 platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-x86.lo" ;; alpha) AC_DEFINE(HAVE_CPU_ARCH_ALPHA, 1, [Defined for Alpha platforms]) ;; sparc) AC_DEFINE(HAVE_CPU_ARCH_SPARC, 1, [Defined for SPARC platforms]) ;; mips) AC_DEFINE(HAVE_CPU_ARCH_MIPS, 1, [Defined for MIPS platforms]) ;; m68k) AC_DEFINE(HAVE_CPU_ARCH_M68K, 1, [Defined for M68k platforms]) ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo" ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; aarch64) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM AArch64 platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; s390x) AC_DEFINE(HAVE_CPU_ARCH_S390X, 1, [Defined for s390x/zSeries platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-s390x.lo" ;; esac AC_SUBST([GCRYPT_HWF_MODULES]) # # Option to disable building of doc file # build_doc=yes AC_ARG_ENABLE([doc], AS_HELP_STRING([--disable-doc], [do not build the documentation]), build_doc=$enableval, build_doc=yes) AM_CONDITIONAL([BUILD_DOC], [test "x$build_doc" != xno]) # # Provide information about the build. # BUILD_REVISION="mym4_revision" AC_SUBST(BUILD_REVISION) AC_DEFINE_UNQUOTED(BUILD_REVISION, "$BUILD_REVISION", [GIT commit id revision used to build this package]) changequote(,)dnl BUILD_VERSION=`echo "$PACKAGE_VERSION" | sed 's/\([0-9.]*\).*/\1./'` changequote([,])dnl BUILD_VERSION="${BUILD_VERSION}mym4_revision_dec" BUILD_FILEVERSION=`echo "${BUILD_VERSION}" | tr . ,` AC_SUBST(BUILD_VERSION) AC_SUBST(BUILD_FILEVERSION) AC_ARG_ENABLE([build-timestamp], AS_HELP_STRING([--enable-build-timestamp], [set an explicit build timestamp for reproducibility. (default is the current time in ISO-8601 format)]), [if test "$enableval" = "yes"; then BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date` else BUILD_TIMESTAMP="$enableval" fi], [BUILD_TIMESTAMP=""]) AC_SUBST(BUILD_TIMESTAMP) AC_DEFINE_UNQUOTED(BUILD_TIMESTAMP, "$BUILD_TIMESTAMP", [The time this package was configured for a build]) # And create the files. AC_CONFIG_FILES([ Makefile m4/Makefile compat/Makefile mpi/Makefile cipher/Makefile random/Makefile doc/Makefile src/Makefile src/gcrypt.h src/libgcrypt-config src/libgcrypt.pc src/versioninfo.rc tests/Makefile ]) AC_CONFIG_FILES([tests/hashtest-256g], [chmod +x tests/hashtest-256g]) AC_CONFIG_FILES([tests/basic-disable-all-hwf], [chmod +x tests/basic-disable-all-hwf]) AC_OUTPUT detection_module="${GCRYPT_HWF_MODULES%.lo}" test -n "$detection_module" || detection_module="none" # Give some feedback GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Libgcrypt],[v${VERSION} has been configured as follows:]) GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Platform: ],[$PRINTABLE_OS_NAME ($host)]) GCRY_MSG_SHOW([Hardware detection module:],[$detection_module]) GCRY_MSG_WRAP([Enabled cipher algorithms:],[$enabled_ciphers]) GCRY_MSG_WRAP([Enabled digest algorithms:],[$enabled_digests]) GCRY_MSG_WRAP([Enabled kdf algorithms: ],[$enabled_kdfs]) GCRY_MSG_WRAP([Enabled pubkey algorithms:],[$enabled_pubkey_ciphers]) GCRY_MSG_SHOW([Random number generator: ],[$random]) GCRY_MSG_SHOW([Try using jitter entropy: ],[$jentsupport]) GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities]) GCRY_MSG_SHOW([FIPS module version: ],[$fips_module_version]) GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport]) GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport]) GCRY_MSG_SHOW([Try using Intel SHAEXT: ],[$shaextsupport]) GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport]) GCRY_MSG_SHOW([Try using Intel SSE4.1: ],[$sse41support]) GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport]) GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport]) GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using Intel AVX512: ],[$avx512support]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) GCRY_MSG_SHOW([Try using PPC crypto: ],[$ppccryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then cat <