diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 2acd7cb3..19420bf4 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -1,149 +1,197 @@ # Makefile for cipher modules # Copyright (C) 1998, 1999, 2000, 2001, 2002, # 2003, 2009 Free Software Foundation, Inc. # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # Process this file with automake to produce Makefile.in # Need to include ../src in addition to top_srcdir because gcrypt.h is # a built header. AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi AM_CFLAGS = $(GPG_ERROR_CFLAGS) AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) EXTRA_DIST = gost-s-box.c CLEANFILES = gost-s-box DISTCLEANFILES = gost-sb.h noinst_LTLIBRARIES = libcipher.la GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) libcipher_la_LIBADD = $(GCRYPT_MODULES) libcipher_la_SOURCES = \ cipher.c cipher-internal.h \ cipher-cbc.c \ cipher-cfb.c \ cipher-ofb.c \ cipher-ctr.c \ cipher-aeswrap.c \ cipher-ccm.c \ cipher-cmac.c \ cipher-gcm.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ cipher-poly1305.c \ cipher-ocb.c \ cipher-xts.c \ cipher-eax.c \ cipher-selftest.c cipher-selftest.h \ pubkey.c pubkey-internal.h pubkey-util.c \ md.c \ mac.c mac-internal.h \ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ poly1305.c poly1305-internal.h \ kdf.c kdf-internal.h \ hmac-tests.c \ bithelp.h \ bufhelp.h \ primegen.c \ hash-common.c hash-common.h \ dsa-common.c rsa-common.c \ sha1.h EXTRA_libcipher_la_SOURCES = \ asm-common-amd64.h \ asm-common-aarch64.h \ asm-poly1305-amd64.h \ arcfour.c arcfour-amd64.S \ blowfish.c blowfish-amd64.S blowfish-arm.S \ cast5.c cast5-amd64.S cast5-arm.S \ chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \ chacha20-armv7-neon.S chacha20-aarch64.S \ crc.c crc-intel-pclmul.c crc-armv8-ce.c \ crc-armv8-aarch64-ce.S \ des.c des-amd64.S \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ md4.c \ md5.c \ rijndael.c rijndael-internal.h rijndael-tables.h \ rijndael-aesni.c rijndael-padlock.c \ rijndael-amd64.S rijndael-arm.S \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ scrypt.c \ seed.c \ serpent.c serpent-sse2-amd64.S \ serpent-avx2-amd64.S serpent-armv7-neon.S \ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \ sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \ sha256-avx2-bmi2-amd64.S \ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ sha256-intel-shaext.c \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \ sha512-avx2-bmi2-amd64.S \ sha512-armv7-neon.S sha512-arm.S \ sm3.c \ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ twofish-avx2-amd64.S \ rfc2268.c \ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ blake2.c \ blake2b-amd64-avx2.S blake2s-amd64-avx.S gost28147.lo: gost-sb.h gost-sb.h: gost-s-box ./gost-s-box $@ gost-s-box: gost-s-box.c $(CC_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c if ENABLE_O_FLAG_MUNGING o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g' else o_flag_munging = cat endif # We need to lower the optimization for this module. -tiger.o: $(srcdir)/tiger.c - `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` +tiger.o: $(srcdir)/tiger.c Makefile + `echo $(COMPILE) -c $< | $(o_flag_munging) ` -tiger.lo: $(srcdir)/tiger.c - `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` +tiger.lo: $(srcdir)/tiger.c Makefile + `echo $(LTCOMPILE) -c $< | $(o_flag_munging) ` + + +# We need to disable instrumentation for these modules as they use cc as +# thin assembly front-end and do not tolerate in-between function calls +# inserted by compiler as those functions may clobber the XMM registers. +if ENABLE_INSTRUMENTATION_MUNGING +instrumentation_munging = sed \ + -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ + -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' +else +instrumentation_munging = cat +endif + +rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c index 8e109ba3..28165c65 100644 --- a/cipher/cipher-gcm-intel-pclmul.c +++ b/cipher/cipher-gcm-intel-pclmul.c @@ -1,704 +1,712 @@ /* cipher-gcm-intel-pclmul.c - Intel PCLMUL accelerated Galois Counter Mode * implementation * Copyright (C) 2013-2014,2019 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "./cipher-internal.h" #ifdef GCM_USE_INTEL_PCLMUL #if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif #if __clang__ # pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) #endif +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE + + /* Intel PCLMUL ghash based on white paper: "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis. */ -static inline void reduction(void) +static ASM_FUNC_ATTR_INLINE void reduction(void) { /* input: */ asm volatile (/* first phase of the reduction */ "movdqa %%xmm3, %%xmm6\n\t" "movdqa %%xmm3, %%xmm5\n\t" "psllq $1, %%xmm6\n\t" /* packed right shifting << 63 */ "pxor %%xmm3, %%xmm6\n\t" "psllq $57, %%xmm5\n\t" /* packed right shifting << 57 */ "psllq $62, %%xmm6\n\t" /* packed right shifting << 62 */ "pxor %%xmm5, %%xmm6\n\t" /* xor the shifted versions */ "pshufd $0x6a, %%xmm6, %%xmm5\n\t" "pshufd $0xae, %%xmm6, %%xmm6\n\t" "pxor %%xmm5, %%xmm3\n\t" /* first phase of the reduction complete */ /* second phase of the reduction */ "pxor %%xmm3, %%xmm1\n\t" /* xor the shifted versions */ "psrlq $1, %%xmm3\n\t" /* packed left shifting >> 1 */ "pxor %%xmm3, %%xmm6\n\t" "psrlq $1, %%xmm3\n\t" /* packed left shifting >> 2 */ "pxor %%xmm3, %%xmm1\n\t" "psrlq $5, %%xmm3\n\t" /* packed left shifting >> 7 */ "pxor %%xmm3, %%xmm6\n\t" "pxor %%xmm6, %%xmm1\n\t" /* the result is in xmm1 */ ::: "memory" ); } -static inline void gfmul_pclmul(void) +static ASM_FUNC_ATTR_INLINE void gfmul_pclmul(void) { /* Input: XMM0 and XMM1, Output: XMM1. Input XMM0 stays unmodified. Input must be converted to little-endian. */ asm volatile (/* gfmul, xmm0 has operator a and xmm1 has operator b. */ "pshufd $78, %%xmm0, %%xmm2\n\t" "pshufd $78, %%xmm1, %%xmm4\n\t" "pxor %%xmm0, %%xmm2\n\t" /* xmm2 holds a0+a1 */ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds b0+b1 */ "movdqa %%xmm0, %%xmm3\n\t" "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds a0*b0 */ "pclmulqdq $17, %%xmm0, %%xmm1\n\t" /* xmm6 holds a1*b1 */ "movdqa %%xmm3, %%xmm5\n\t" "pclmulqdq $0, %%xmm2, %%xmm4\n\t" /* xmm4 holds (a0+a1)*(b0+b1) */ "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ "movdqa %%xmm4, %%xmm5\n\t" "psrldq $8, %%xmm4\n\t" "pslldq $8, %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm4, %%xmm1\n\t" /* holds the result of the carry-less multiplication of xmm0 by xmm1 */ ::: "memory" ); reduction(); } -static inline void gfmul_pclmul_aggr4(const void *buf, const void *h_1, - const void *h_table, - const unsigned char *be_mask) +static ASM_FUNC_ATTR_INLINE void +gfmul_pclmul_aggr4(const void *buf, const void *h_1, const void *h_table, + const unsigned char *be_mask) { /* Input: Hash: XMM1 Output: Hash: XMM1 */ asm volatile (/* perform clmul and merge results... */ "movdqu 2*16(%[h_table]), %%xmm2\n\t" /* Load H4 */ "movdqu 0*16(%[buf]), %%xmm5\n\t" "pshufb %[be_mask], %%xmm5\n\t" /* be => le */ "pxor %%xmm5, %%xmm1\n\t" "pshufd $78, %%xmm2, %%xmm5\n\t" "pshufd $78, %%xmm1, %%xmm4\n\t" "pxor %%xmm2, %%xmm5\n\t" /* xmm5 holds 4:a0+a1 */ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds 4:b0+b1 */ "movdqa %%xmm2, %%xmm3\n\t" "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds 4:a0*b0 */ "pclmulqdq $17, %%xmm2, %%xmm1\n\t" /* xmm1 holds 4:a1*b1 */ "pclmulqdq $0, %%xmm5, %%xmm4\n\t" /* xmm4 holds 4:(a0+a1)*(b0+b1) */ "movdqu 1*16(%[h_table]), %%xmm5\n\t" /* Load H3 */ "movdqu 1*16(%[buf]), %%xmm2\n\t" "pshufb %[be_mask], %%xmm2\n\t" /* be => le */ "pshufd $78, %%xmm5, %%xmm0\n\t" "pshufd $78, %%xmm2, %%xmm7\n\t" "pxor %%xmm5, %%xmm0\n\t" /* xmm0 holds 3:a0+a1 */ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */ "movdqa %%xmm5, %%xmm6\n\t" "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */ "pclmulqdq $17, %%xmm5, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */ "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */ "movdqu 2*16(%[buf]), %%xmm5\n\t" "pshufb %[be_mask], %%xmm5\n\t" /* be => le */ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 3+4:a0*b0 */ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 3+4:(a0+a1)*(b0+b1) */ "movdqu 0*16(%[h_table]), %%xmm2\n\t" /* Load H2 */ "pshufd $78, %%xmm2, %%xmm0\n\t" "pshufd $78, %%xmm5, %%xmm7\n\t" "pxor %%xmm2, %%xmm0\n\t" /* xmm0 holds 2:a0+a1 */ "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 2:b0+b1 */ "movdqa %%xmm2, %%xmm6\n\t" "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 2:a0*b0 */ "pclmulqdq $17, %%xmm2, %%xmm5\n\t" /* xmm5 holds 2:a1*b1 */ "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 2:(a0+a1)*(b0+b1) */ "movdqu 3*16(%[buf]), %%xmm2\n\t" "pshufb %[be_mask], %%xmm2\n\t" /* be => le */ : : [buf] "r" (buf), [h_table] "r" (h_table), [be_mask] "m" (*be_mask) : "memory" ); asm volatile ("pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 2+3+4:a0*b0 */ "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 2+3+4:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 2+3+4:(a0+a1)*(b0+b1) */ "movdqu %[h_1], %%xmm5\n\t" /* Load H1 */ "pshufd $78, %%xmm5, %%xmm0\n\t" "pshufd $78, %%xmm2, %%xmm7\n\t" "pxor %%xmm5, %%xmm0\n\t" /* xmm0 holds 1:a0+a1 */ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 1:b0+b1 */ "movdqa %%xmm5, %%xmm6\n\t" "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 1:a0*b0 */ "pclmulqdq $17, %%xmm5, %%xmm2\n\t" /* xmm2 holds 1:a1*b1 */ "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 1:(a0+a1)*(b0+b1) */ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 1+2+3+4:a0*b0 */ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 1+2+3+4:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 1+2+3+4:(a0+a1)*(b0+b1) */ /* aggregated reduction... */ "movdqa %%xmm3, %%xmm5\n\t" "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ "movdqa %%xmm4, %%xmm5\n\t" "psrldq $8, %%xmm4\n\t" "pslldq $8, %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm4, %%xmm1\n\t" /* holds the result of the carry-less multiplication of xmm0 by xmm1 */ : : [h_1] "m" (*(const unsigned char *)h_1) : "memory" ); reduction(); } #ifdef __x86_64__ -static inline void gfmul_pclmul_aggr8(const void *buf, const void *h_table) +static ASM_FUNC_ATTR_INLINE void +gfmul_pclmul_aggr8(const void *buf, const void *h_table) { /* Input: H¹: XMM0 bemask: XMM15 Hash: XMM1 Output: Hash: XMM1 Inputs XMM0 and XMM15 stays unmodified. */ asm volatile (/* Load H6, H7, H8. */ "movdqu 6*16(%[h_table]), %%xmm10\n\t" "movdqu 5*16(%[h_table]), %%xmm9\n\t" "movdqu 4*16(%[h_table]), %%xmm8\n\t" /* perform clmul and merge results... */ "movdqu 0*16(%[buf]), %%xmm5\n\t" "movdqu 1*16(%[buf]), %%xmm2\n\t" "pshufb %%xmm15, %%xmm5\n\t" /* be => le */ "pshufb %%xmm15, %%xmm2\n\t" /* be => le */ "pxor %%xmm5, %%xmm1\n\t" "pshufd $78, %%xmm10, %%xmm5\n\t" "pshufd $78, %%xmm1, %%xmm4\n\t" "pxor %%xmm10, %%xmm5\n\t" /* xmm5 holds 8:a0+a1 */ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds 8:b0+b1 */ "movdqa %%xmm10, %%xmm3\n\t" "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds 8:a0*b0 */ "pclmulqdq $17, %%xmm10, %%xmm1\n\t" /* xmm1 holds 8:a1*b1 */ "pclmulqdq $0, %%xmm5, %%xmm4\n\t" /* xmm4 holds 8:(a0+a1)*(b0+b1) */ "pshufd $78, %%xmm9, %%xmm11\n\t" "pshufd $78, %%xmm2, %%xmm7\n\t" "pxor %%xmm9, %%xmm11\n\t" /* xmm11 holds 7:a0+a1 */ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 7:b0+b1 */ "movdqa %%xmm9, %%xmm6\n\t" "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 7:a0*b0 */ "pclmulqdq $17, %%xmm9, %%xmm2\n\t" /* xmm2 holds 7:a1*b1 */ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 7:(a0+a1)*(b0+b1) */ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 7+8:a0*b0 */ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 7+8:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 7+8:(a0+a1)*(b0+b1) */ "movdqu 2*16(%[buf]), %%xmm5\n\t" "movdqu 3*16(%[buf]), %%xmm2\n\t" "pshufb %%xmm15, %%xmm5\n\t" /* be => le */ "pshufb %%xmm15, %%xmm2\n\t" /* be => le */ "pshufd $78, %%xmm8, %%xmm11\n\t" "pshufd $78, %%xmm5, %%xmm7\n\t" "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 6:a0+a1 */ "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 6:b0+b1 */ "movdqa %%xmm8, %%xmm6\n\t" "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 6:a0*b0 */ "pclmulqdq $17, %%xmm8, %%xmm5\n\t" /* xmm5 holds 6:a1*b1 */ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 6:(a0+a1)*(b0+b1) */ /* Load H3, H4, H5. */ "movdqu 3*16(%[h_table]), %%xmm10\n\t" "movdqu 2*16(%[h_table]), %%xmm9\n\t" "movdqu 1*16(%[h_table]), %%xmm8\n\t" "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 6+7+8:a0*b0 */ "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 6+7+8:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 6+7+8:(a0+a1)*(b0+b1) */ "pshufd $78, %%xmm10, %%xmm11\n\t" "pshufd $78, %%xmm2, %%xmm7\n\t" "pxor %%xmm10, %%xmm11\n\t" /* xmm11 holds 5:a0+a1 */ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 5:b0+b1 */ "movdqa %%xmm10, %%xmm6\n\t" "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 5:a0*b0 */ "pclmulqdq $17, %%xmm10, %%xmm2\n\t" /* xmm2 holds 5:a1*b1 */ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 5:(a0+a1)*(b0+b1) */ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 5+6+7+8:a0*b0 */ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 5+6+7+8:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 5+6+7+8:(a0+a1)*(b0+b1) */ "movdqu 4*16(%[buf]), %%xmm5\n\t" "movdqu 5*16(%[buf]), %%xmm2\n\t" "pshufb %%xmm15, %%xmm5\n\t" /* be => le */ "pshufb %%xmm15, %%xmm2\n\t" /* be => le */ "pshufd $78, %%xmm9, %%xmm11\n\t" "pshufd $78, %%xmm5, %%xmm7\n\t" "pxor %%xmm9, %%xmm11\n\t" /* xmm11 holds 4:a0+a1 */ "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 4:b0+b1 */ "movdqa %%xmm9, %%xmm6\n\t" "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 4:a0*b0 */ "pclmulqdq $17, %%xmm9, %%xmm5\n\t" /* xmm5 holds 4:a1*b1 */ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 4:(a0+a1)*(b0+b1) */ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 4+5+6+7+8:a0*b0 */ "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 4+5+6+7+8:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 4+5+6+7+8:(a0+a1)*(b0+b1) */ "pshufd $78, %%xmm8, %%xmm11\n\t" "pshufd $78, %%xmm2, %%xmm7\n\t" "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 3:a0+a1 */ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */ "movdqa %%xmm8, %%xmm6\n\t" "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */ "pclmulqdq $17, %%xmm8, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */ "movdqu 0*16(%[h_table]), %%xmm8\n\t" /* Load H2 */ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 3+4+5+6+7+8:a0*b0 */ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4+5+6+7+8:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 3+4+5+6+7+8:(a0+a1)*(b0+b1) */ "movdqu 6*16(%[buf]), %%xmm5\n\t" "movdqu 7*16(%[buf]), %%xmm2\n\t" "pshufb %%xmm15, %%xmm5\n\t" /* be => le */ "pshufb %%xmm15, %%xmm2\n\t" /* be => le */ "pshufd $78, %%xmm8, %%xmm11\n\t" "pshufd $78, %%xmm5, %%xmm7\n\t" "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 4:a0+a1 */ "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 4:b0+b1 */ "movdqa %%xmm8, %%xmm6\n\t" "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 4:a0*b0 */ "pclmulqdq $17, %%xmm8, %%xmm5\n\t" /* xmm5 holds 4:a1*b1 */ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 4:(a0+a1)*(b0+b1) */ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 2+3+4+5+6+7+8:a0*b0 */ "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 2+3+4+5+6+7+8:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 2+3+4+5+6+7+8:(a0+a1)*(b0+b1) */ "pshufd $78, %%xmm0, %%xmm11\n\t" "pshufd $78, %%xmm2, %%xmm7\n\t" "pxor %%xmm0, %%xmm11\n\t" /* xmm11 holds 3:a0+a1 */ "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */ "movdqa %%xmm0, %%xmm6\n\t" "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */ "pclmulqdq $17, %%xmm0, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */ "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */ "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 1+2+3+3+4+5+6+7+8:a0*b0 */ "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 1+2+3+3+4+5+6+7+8:a1*b1 */ "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 1+2+3+3+4+5+6+7+8:(a0+a1)*(b0+b1) */ /* aggregated reduction... */ "movdqa %%xmm3, %%xmm5\n\t" "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ "movdqa %%xmm4, %%xmm5\n\t" "psrldq $8, %%xmm4\n\t" "pslldq $8, %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm4, %%xmm1\n\t" /* holds the result of the carry-less multiplication of xmm0 by xmm1 */ : : [buf] "r" (buf), [h_table] "r" (h_table) : "memory" ); reduction(); } #endif -static inline void gcm_lsh(void *h, unsigned int hoffs) +static ASM_FUNC_ATTR_INLINE void gcm_lsh(void *h, unsigned int hoffs) { static const u64 pconst[2] __attribute__ ((aligned (16))) = { U64_C(0x0000000000000001), U64_C(0xc200000000000000) }; asm volatile ("movdqu (%[h]), %%xmm2\n\t" "pshufd $0xff, %%xmm2, %%xmm3\n\t" "movdqa %%xmm2, %%xmm4\n\t" "psrad $31, %%xmm3\n\t" "pslldq $8, %%xmm4\n\t" "pand %[pconst], %%xmm3\n\t" "paddq %%xmm2, %%xmm2\n\t" "psrlq $63, %%xmm4\n\t" "pxor %%xmm3, %%xmm2\n\t" "pxor %%xmm4, %%xmm2\n\t" "movdqu %%xmm2, (%[h])\n\t" : : [pconst] "m" (pconst), [h] "r" ((byte *)h + hoffs) : "memory" ); } -void +void ASM_FUNC_ATTR _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; #if defined(__x86_64__) && defined(__WIN64__) char win64tmp[10 * 16]; /* XMM6-XMM15 need to be restored after use. */ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" "movdqu %%xmm7, 1*16(%0)\n\t" "movdqu %%xmm8, 2*16(%0)\n\t" "movdqu %%xmm9, 3*16(%0)\n\t" "movdqu %%xmm10, 4*16(%0)\n\t" "movdqu %%xmm11, 5*16(%0)\n\t" "movdqu %%xmm12, 6*16(%0)\n\t" "movdqu %%xmm13, 7*16(%0)\n\t" "movdqu %%xmm14, 8*16(%0)\n\t" "movdqu %%xmm15, 9*16(%0)\n\t" : : "r" (win64tmp) : "memory" ); #endif /* Swap endianness of hsub. */ asm volatile ("movdqu (%[key]), %%xmm0\n\t" "pshufb %[be_mask], %%xmm0\n\t" "movdqu %%xmm0, (%[key])\n\t" : : [key] "r" (c->u_mode.gcm.u_ghash_key.key), [be_mask] "m" (*be_mask) : "memory"); gcm_lsh(c->u_mode.gcm.u_ghash_key.key, 0); /* H <<< 1 */ asm volatile ("movdqa %%xmm0, %%xmm1\n\t" "movdqu (%[key]), %%xmm0\n\t" /* load H <<< 1 */ : : [key] "r" (c->u_mode.gcm.u_ghash_key.key) : "memory"); gfmul_pclmul (); /* H<<<1•H => H² */ asm volatile ("movdqu %%xmm1, 0*16(%[h_table])\n\t" "movdqa %%xmm1, %%xmm7\n\t" : : [h_table] "r" (c->u_mode.gcm.gcm_table) : "memory"); gcm_lsh(c->u_mode.gcm.gcm_table, 0 * 16); /* H² <<< 1 */ gfmul_pclmul (); /* H<<<1•H² => H³ */ asm volatile ("movdqa %%xmm7, %%xmm0\n\t" "movdqu %%xmm1, 1*16(%[h_table])\n\t" "movdqu 0*16(%[h_table]), %%xmm1\n\t" /* load H² <<< 1 */ : : [h_table] "r" (c->u_mode.gcm.gcm_table) : "memory"); gfmul_pclmul (); /* H²<<<1•H² => H⁴ */ asm volatile ("movdqu %%xmm1, 2*16(%[h_table])\n\t" "movdqa %%xmm1, %%xmm0\n\t" "movdqu (%[key]), %%xmm1\n\t" /* load H <<< 1 */ : : [h_table] "r" (c->u_mode.gcm.gcm_table), [key] "r" (c->u_mode.gcm.u_ghash_key.key) : "memory"); gcm_lsh(c->u_mode.gcm.gcm_table, 1 * 16); /* H³ <<< 1 */ gcm_lsh(c->u_mode.gcm.gcm_table, 2 * 16); /* H⁴ <<< 1 */ #ifdef __x86_64__ gfmul_pclmul (); /* H<<<1•H⁴ => H⁵ */ asm volatile ("movdqu %%xmm1, 3*16(%[h_table])\n\t" "movdqu 0*16(%[h_table]), %%xmm1\n\t" /* load H² <<< 1 */ : : [h_table] "r" (c->u_mode.gcm.gcm_table) : "memory"); gfmul_pclmul (); /* H²<<<1•H⁴ => H⁶ */ asm volatile ("movdqu %%xmm1, 4*16(%[h_table])\n\t" "movdqu 1*16(%[h_table]), %%xmm1\n\t" /* load H³ <<< 1 */ : : [h_table] "r" (c->u_mode.gcm.gcm_table) : "memory"); gfmul_pclmul (); /* H³<<<1•H⁴ => H⁷ */ asm volatile ("movdqu %%xmm1, 5*16(%[h_table])\n\t" "movdqu 2*16(%[h_table]), %%xmm1\n\t" /* load H⁴ <<< 1 */ : : [h_table] "r" (c->u_mode.gcm.gcm_table) : "memory"); gfmul_pclmul (); /* H³<<<1•H⁴ => H⁸ */ asm volatile ("movdqu %%xmm1, 6*16(%[h_table])\n\t" : : [h_table] "r" (c->u_mode.gcm.gcm_table) : "memory"); gcm_lsh(c->u_mode.gcm.gcm_table, 3 * 16); /* H⁵ <<< 1 */ gcm_lsh(c->u_mode.gcm.gcm_table, 4 * 16); /* H⁶ <<< 1 */ gcm_lsh(c->u_mode.gcm.gcm_table, 5 * 16); /* H⁷ <<< 1 */ gcm_lsh(c->u_mode.gcm.gcm_table, 6 * 16); /* H⁸ <<< 1 */ #ifdef __WIN64__ /* Clear/restore used registers. */ asm volatile( "pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm1, %%xmm1\n\t" "pxor %%xmm2, %%xmm2\n\t" "pxor %%xmm3, %%xmm3\n\t" "pxor %%xmm4, %%xmm4\n\t" "pxor %%xmm5, %%xmm5\n\t" "movdqu 0*16(%0), %%xmm6\n\t" "movdqu 1*16(%0), %%xmm7\n\t" "movdqu 2*16(%0), %%xmm8\n\t" "movdqu 3*16(%0), %%xmm9\n\t" "movdqu 4*16(%0), %%xmm10\n\t" "movdqu 5*16(%0), %%xmm11\n\t" "movdqu 6*16(%0), %%xmm12\n\t" "movdqu 7*16(%0), %%xmm13\n\t" "movdqu 8*16(%0), %%xmm14\n\t" "movdqu 9*16(%0), %%xmm15\n\t" : : "r" (win64tmp) : "memory" ); #else /* Clear used registers. */ asm volatile( "pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm1, %%xmm1\n\t" "pxor %%xmm2, %%xmm2\n\t" "pxor %%xmm3, %%xmm3\n\t" "pxor %%xmm4, %%xmm4\n\t" "pxor %%xmm5, %%xmm5\n\t" "pxor %%xmm6, %%xmm6\n\t" "pxor %%xmm7, %%xmm7\n\t" "pxor %%xmm8, %%xmm8\n\t" "pxor %%xmm9, %%xmm9\n\t" "pxor %%xmm10, %%xmm10\n\t" "pxor %%xmm11, %%xmm11\n\t" "pxor %%xmm12, %%xmm12\n\t" "pxor %%xmm13, %%xmm13\n\t" "pxor %%xmm14, %%xmm14\n\t" "pxor %%xmm15, %%xmm15\n\t" ::: "memory" ); #endif #endif } -unsigned int +unsigned int ASM_FUNC_ATTR _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, size_t nblocks) { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; #if defined(__x86_64__) && defined(__WIN64__) char win64tmp[10 * 16]; #endif if (nblocks == 0) return 0; #if defined(__x86_64__) && defined(__WIN64__) /* XMM6-XMM15 need to be restored after use. */ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" "movdqu %%xmm7, 1*16(%0)\n\t" "movdqu %%xmm8, 2*16(%0)\n\t" "movdqu %%xmm9, 3*16(%0)\n\t" "movdqu %%xmm10, 4*16(%0)\n\t" "movdqu %%xmm11, 5*16(%0)\n\t" "movdqu %%xmm12, 6*16(%0)\n\t" "movdqu %%xmm13, 7*16(%0)\n\t" "movdqu %%xmm14, 8*16(%0)\n\t" "movdqu %%xmm15, 9*16(%0)\n\t" : : "r" (win64tmp) : "memory" ); #endif /* Preload hash. */ asm volatile ("movdqa %[be_mask], %%xmm7\n\t" "movdqu %[hash], %%xmm1\n\t" "pshufb %%xmm7, %%xmm1\n\t" /* be => le */ : : [hash] "m" (*result), [be_mask] "m" (*be_mask) : "memory" ); #ifdef __x86_64__ if (nblocks >= 8) { /* Preload H1. */ asm volatile ("movdqa %%xmm7, %%xmm15\n\t" "movdqa %[h_1], %%xmm0\n\t" : : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key) : "memory" ); while (nblocks >= 8) { gfmul_pclmul_aggr8 (buf, c->u_mode.gcm.gcm_table); buf += 8 * blocksize; nblocks -= 8; } #ifndef __WIN64__ /* Clear used x86-64/XMM registers. */ asm volatile( "pxor %%xmm8, %%xmm8\n\t" "pxor %%xmm9, %%xmm9\n\t" "pxor %%xmm10, %%xmm10\n\t" "pxor %%xmm11, %%xmm11\n\t" "pxor %%xmm12, %%xmm12\n\t" "pxor %%xmm13, %%xmm13\n\t" "pxor %%xmm14, %%xmm14\n\t" "pxor %%xmm15, %%xmm15\n\t" ::: "memory" ); #endif } #endif while (nblocks >= 4) { gfmul_pclmul_aggr4 (buf, c->u_mode.gcm.u_ghash_key.key, c->u_mode.gcm.gcm_table, be_mask); buf += 4 * blocksize; nblocks -= 4; } if (nblocks) { /* Preload H1. */ asm volatile ("movdqa %[h_1], %%xmm0\n\t" : : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key) : "memory" ); while (nblocks) { asm volatile ("movdqu %[buf], %%xmm2\n\t" "pshufb %[be_mask], %%xmm2\n\t" /* be => le */ "pxor %%xmm2, %%xmm1\n\t" : : [buf] "m" (*buf), [be_mask] "m" (*be_mask) : "memory" ); gfmul_pclmul (); buf += blocksize; nblocks--; } } /* Store hash. */ asm volatile ("pshufb %[be_mask], %%xmm1\n\t" /* be => le */ "movdqu %%xmm1, %[hash]\n\t" : [hash] "=m" (*result) : [be_mask] "m" (*be_mask) : "memory" ); #if defined(__x86_64__) && defined(__WIN64__) /* Clear/restore used registers. */ asm volatile( "pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm1, %%xmm1\n\t" "pxor %%xmm2, %%xmm2\n\t" "pxor %%xmm3, %%xmm3\n\t" "pxor %%xmm4, %%xmm4\n\t" "pxor %%xmm5, %%xmm5\n\t" "movdqu 0*16(%0), %%xmm6\n\t" "movdqu 1*16(%0), %%xmm7\n\t" "movdqu 2*16(%0), %%xmm8\n\t" "movdqu 3*16(%0), %%xmm9\n\t" "movdqu 4*16(%0), %%xmm10\n\t" "movdqu 5*16(%0), %%xmm11\n\t" "movdqu 6*16(%0), %%xmm12\n\t" "movdqu 7*16(%0), %%xmm13\n\t" "movdqu 8*16(%0), %%xmm14\n\t" "movdqu 9*16(%0), %%xmm15\n\t" : : "r" (win64tmp) : "memory" ); #else /* Clear used registers. */ asm volatile( "pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm1, %%xmm1\n\t" "pxor %%xmm2, %%xmm2\n\t" "pxor %%xmm3, %%xmm3\n\t" "pxor %%xmm4, %%xmm4\n\t" "pxor %%xmm5, %%xmm5\n\t" "pxor %%xmm6, %%xmm6\n\t" "pxor %%xmm7, %%xmm7\n\t" ::: "memory" ); #endif return 0; } #if __clang__ # pragma clang attribute pop #endif #endif /* GCM_USE_INTEL_PCLMUL */ diff --git a/cipher/crc-intel-pclmul.c b/cipher/crc-intel-pclmul.c index 482b260b..8c8b1915 100644 --- a/cipher/crc-intel-pclmul.c +++ b/cipher/crc-intel-pclmul.c @@ -1,932 +1,939 @@ /* crc-intel-pclmul.c - Intel PCLMUL accelerated CRC implementation * Copyright (C) 2016 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * */ #include #include #include #include #include "g10lib.h" #include "bithelp.h" #include "bufhelp.h" #if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT) && \ __GNUC__ >= 4 && \ ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) #if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif #if __clang__ # pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) #endif +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE + + #define ALIGNED_16 __attribute__ ((aligned (16))) struct u16_unaligned_s { u16 a; } __attribute__((packed, aligned (1), may_alias)); /* Constants structure for generic reflected/non-reflected CRC32 CLMUL * functions. */ struct crc32_consts_s { /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */ u64 k[6]; /* my_p: { floor(x^64 / P(x)), P(x) } */ u64 my_p[2]; }; /* CLMUL constants for CRC32 and CRC32RFC1510. */ static const struct crc32_consts_s crc32_consts ALIGNED_16 = { { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */ U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */ U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */ U64_C(0x163cd6124), 0 /* y = 2 */ }, { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */ U64_C(0x1f7011641), U64_C(0x1db710641) } }; /* CLMUL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */ static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 = { { /* k[6] = x^(32*y) mod P(x) << 32*/ U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */ U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */ U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */ }, { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */ U64_C(0x1f845fe24), U64_C(0x1864cfb00) } }; /* Common constants for CRC32 algorithms. */ static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_16 = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }; static const byte crc32_shuf_shift[3 * 16] ALIGNED_16 = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }; static const byte *crc32_bswap_shuf = &crc32_shuf_shift[16]; static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_16 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }; static const u64 crc32_merge9to15_shuf[15 - 9 + 1][2] ALIGNED_16 = { { U64_C(0x0706050403020100), U64_C(0xffffffffffffff0f) }, /* 9 */ { U64_C(0x0706050403020100), U64_C(0xffffffffffff0f0e) }, { U64_C(0x0706050403020100), U64_C(0xffffffffff0f0e0d) }, { U64_C(0x0706050403020100), U64_C(0xffffffff0f0e0d0c) }, { U64_C(0x0706050403020100), U64_C(0xffffff0f0e0d0c0b) }, { U64_C(0x0706050403020100), U64_C(0xffff0f0e0d0c0b0a) }, { U64_C(0x0706050403020100), U64_C(0xff0f0e0d0c0b0a09) }, /* 15 */ }; static const u64 crc32_merge5to7_shuf[7 - 5 + 1][2] ALIGNED_16 = { { U64_C(0xffffff0703020100), U64_C(0xffffffffffffffff) }, /* 5 */ { U64_C(0xffff070603020100), U64_C(0xffffffffffffffff) }, { U64_C(0xff07060503020100), U64_C(0xffffffffffffffff) }, /* 7 */ }; /* PCLMUL functions for reflected CRC32. */ -static inline void +static ASM_FUNC_ATTR_INLINE void crc32_reflected_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, const struct crc32_consts_s *consts) { if (inlen >= 8 * 16) { asm volatile ("movd %[crc], %%xmm4\n\t" "movdqu %[inbuf_0], %%xmm0\n\t" "movdqu %[inbuf_1], %%xmm1\n\t" "movdqu %[inbuf_2], %%xmm2\n\t" "movdqu %[inbuf_3], %%xmm3\n\t" "pxor %%xmm4, %%xmm0\n\t" : : [inbuf_0] "m" (inbuf[0 * 16]), [inbuf_1] "m" (inbuf[1 * 16]), [inbuf_2] "m" (inbuf[2 * 16]), [inbuf_3] "m" (inbuf[3 * 16]), [crc] "m" (*pcrc) ); inbuf += 4 * 16; inlen -= 4 * 16; asm volatile ("movdqa %[k1k2], %%xmm4\n\t" : : [k1k2] "m" (consts->k[1 - 1]) ); /* Fold by 4. */ while (inlen >= 4 * 16) { asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t" "movdqa %%xmm0, %%xmm6\n\t" "pclmulqdq $0x00, %%xmm4, %%xmm0\n\t" "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm6, %%xmm0\n\t" "movdqu %[inbuf_1], %%xmm5\n\t" "movdqa %%xmm1, %%xmm6\n\t" "pclmulqdq $0x00, %%xmm4, %%xmm1\n\t" "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm1\n\t" "pxor %%xmm6, %%xmm1\n\t" "movdqu %[inbuf_2], %%xmm5\n\t" "movdqa %%xmm2, %%xmm6\n\t" "pclmulqdq $0x00, %%xmm4, %%xmm2\n\t" "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm2\n\t" "pxor %%xmm6, %%xmm2\n\t" "movdqu %[inbuf_3], %%xmm5\n\t" "movdqa %%xmm3, %%xmm6\n\t" "pclmulqdq $0x00, %%xmm4, %%xmm3\n\t" "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm6, %%xmm3\n\t" : : [inbuf_0] "m" (inbuf[0 * 16]), [inbuf_1] "m" (inbuf[1 * 16]), [inbuf_2] "m" (inbuf[2 * 16]), [inbuf_3] "m" (inbuf[3 * 16]) ); inbuf += 4 * 16; inlen -= 4 * 16; } asm volatile ("movdqa %[k3k4], %%xmm6\n\t" "movdqa %[my_p], %%xmm5\n\t" : : [k3k4] "m" (consts->k[3 - 1]), [my_p] "m" (consts->my_p[0]) ); /* Fold 4 to 1. */ asm volatile ("movdqa %%xmm0, %%xmm4\n\t" "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" "pxor %%xmm1, %%xmm0\n\t" "pxor %%xmm4, %%xmm0\n\t" "movdqa %%xmm0, %%xmm4\n\t" "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" "pxor %%xmm2, %%xmm0\n\t" "pxor %%xmm4, %%xmm0\n\t" "movdqa %%xmm0, %%xmm4\n\t" "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" "pxor %%xmm3, %%xmm0\n\t" "pxor %%xmm4, %%xmm0\n\t" : : ); } else { asm volatile ("movd %[crc], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "movdqa %[k3k4], %%xmm6\n\t" "pxor %%xmm1, %%xmm0\n\t" "movdqa %[my_p], %%xmm5\n\t" : : [inbuf] "m" (*inbuf), [crc] "m" (*pcrc), [k3k4] "m" (consts->k[3 - 1]), [my_p] "m" (consts->my_p[0]) ); inbuf += 16; inlen -= 16; } /* Fold by 1. */ if (inlen >= 16) { while (inlen >= 16) { /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */ asm volatile ("movdqu %[inbuf], %%xmm2\n\t" "movdqa %%xmm0, %%xmm1\n\t" "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t" "pxor %%xmm2, %%xmm0\n\t" "pxor %%xmm1, %%xmm0\n\t" : : [inbuf] "m" (*inbuf) ); inbuf += 16; inlen -= 16; } } /* Partial fold. */ if (inlen) { /* Load last input and add padding zeros. */ asm volatile ("movdqu %[shr_shuf], %%xmm3\n\t" "movdqu %[shl_shuf], %%xmm4\n\t" "movdqu %[mask], %%xmm2\n\t" "movdqa %%xmm0, %%xmm1\n\t" "pshufb %%xmm4, %%xmm0\n\t" "movdqu %[inbuf], %%xmm4\n\t" "pshufb %%xmm3, %%xmm1\n\t" "pand %%xmm4, %%xmm2\n\t" "por %%xmm1, %%xmm2\n\t" "movdqa %%xmm0, %%xmm1\n\t" "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t" "pxor %%xmm2, %%xmm0\n\t" "pxor %%xmm1, %%xmm0\n\t" : : [inbuf] "m" (*(inbuf - 16 + inlen)), [mask] "m" (crc32_partial_fold_input_mask[inlen]), [shl_shuf] "m" (crc32_refl_shuf_shift[inlen]), [shr_shuf] "m" (crc32_refl_shuf_shift[inlen + 16]) ); inbuf += inlen; inlen -= inlen; } /* Final fold. */ asm volatile (/* reduce 128-bits to 96-bits */ "movdqa %%xmm0, %%xmm1\n\t" "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t" "psrldq $8, %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" /* reduce 96-bits to 64-bits */ "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */ "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */ "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */ "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */ /* barrett reduction */ "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */ "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */ "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ "pxor %%xmm1, %%xmm0\n\t" /* store CRC */ "pextrd $2, %%xmm0, %[out]\n\t" : [out] "=m" (*pcrc) : [k5] "m" (consts->k[5 - 1]) ); } -static inline void +static ASM_FUNC_ATTR_INLINE void crc32_reflected_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, const struct crc32_consts_s *consts) { if (inlen < 4) { u32 crc = *pcrc; u32 data; asm volatile ("movdqa %[my_p], %%xmm5\n\t" : : [my_p] "m" (consts->my_p[0]) ); if (inlen == 1) { data = inbuf[0]; data ^= crc; data <<= 24; crc >>= 8; } else if (inlen == 2) { data = ((const struct u16_unaligned_s *)inbuf)->a; data ^= crc; data <<= 16; crc >>= 16; } else { data = ((const struct u16_unaligned_s *)inbuf)->a; data |= inbuf[2] << 16; data ^= crc; data <<= 8; crc >>= 24; } /* Barrett reduction */ asm volatile ("movd %[in], %%xmm0\n\t" "movd %[crc], %%xmm1\n\t" "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ "psllq $32, %%xmm1\n\t" "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */ "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ "pxor %%xmm1, %%xmm0\n\t" "pextrd $1, %%xmm0, %[out]\n\t" : [out] "=m" (*pcrc) : [in] "rm" (data), [crc] "rm" (crc) ); } else if (inlen == 4) { /* Barrett reduction */ asm volatile ("movd %[crc], %%xmm1\n\t" "movd %[in], %%xmm0\n\t" "movdqa %[my_p], %%xmm5\n\t" "pxor %%xmm1, %%xmm0\n\t" "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */ "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ "pextrd $1, %%xmm0, %[out]\n\t" : [out] "=m" (*pcrc) : [in] "m" (*inbuf), [crc] "m" (*pcrc), [my_p] "m" (consts->my_p[0]) ); } else { asm volatile ("movdqu %[shuf], %%xmm4\n\t" "movd %[crc], %%xmm1\n\t" "movdqa %[my_p], %%xmm5\n\t" "movdqa %[k3k4], %%xmm6\n\t" : : [shuf] "m" (crc32_refl_shuf_shift[inlen]), [crc] "m" (*pcrc), [my_p] "m" (consts->my_p[0]), [k3k4] "m" (consts->k[3 - 1]) ); if (inlen >= 8) { asm volatile ("movq %[inbuf], %%xmm0\n\t" : : [inbuf] "m" (*inbuf) ); if (inlen > 8) { asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/ "movq %[inbuf_tail], %%xmm2\n\t" "punpcklqdq %%xmm2, %%xmm0\n\t" "pshufb %[merge_shuf], %%xmm0\n\t" : : [inbuf_tail] "m" (inbuf[inlen - 8]), [merge_shuf] "m" (*crc32_merge9to15_shuf[inlen - 9]) ); } } else { asm volatile ("movd %[inbuf], %%xmm0\n\t" "pinsrd $1, %[inbuf_tail], %%xmm0\n\t" "pshufb %[merge_shuf], %%xmm0\n\t" : : [inbuf] "m" (*inbuf), [inbuf_tail] "m" (inbuf[inlen - 4]), [merge_shuf] "m" (*crc32_merge5to7_shuf[inlen - 5]) ); } /* Final fold. */ asm volatile ("pxor %%xmm1, %%xmm0\n\t" "pshufb %%xmm4, %%xmm0\n\t" /* reduce 128-bits to 96-bits */ "movdqa %%xmm0, %%xmm1\n\t" "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t" "psrldq $8, %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" /* top 32-bit are zero */ /* reduce 96-bits to 64-bits */ "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */ "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */ "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */ "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */ /* barrett reduction */ "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */ "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */ "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ "pxor %%xmm1, %%xmm0\n\t" /* store CRC */ "pextrd $2, %%xmm0, %[out]\n\t" : [out] "=m" (*pcrc) : [k5] "m" (consts->k[5 - 1]) ); } } /* PCLMUL functions for non-reflected CRC32. */ -static inline void +static ASM_FUNC_ATTR_INLINE void crc32_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, const struct crc32_consts_s *consts) { asm volatile ("movdqa %[bswap], %%xmm7\n\t" : : [bswap] "m" (*crc32_bswap_shuf) ); if (inlen >= 8 * 16) { asm volatile ("movd %[crc], %%xmm4\n\t" "movdqu %[inbuf_0], %%xmm0\n\t" "movdqu %[inbuf_1], %%xmm1\n\t" "movdqu %[inbuf_2], %%xmm2\n\t" "pxor %%xmm4, %%xmm0\n\t" "movdqu %[inbuf_3], %%xmm3\n\t" "pshufb %%xmm7, %%xmm0\n\t" "pshufb %%xmm7, %%xmm1\n\t" "pshufb %%xmm7, %%xmm2\n\t" "pshufb %%xmm7, %%xmm3\n\t" : : [inbuf_0] "m" (inbuf[0 * 16]), [inbuf_1] "m" (inbuf[1 * 16]), [inbuf_2] "m" (inbuf[2 * 16]), [inbuf_3] "m" (inbuf[3 * 16]), [crc] "m" (*pcrc) ); inbuf += 4 * 16; inlen -= 4 * 16; asm volatile ("movdqa %[k1k2], %%xmm4\n\t" : : [k1k2] "m" (consts->k[1 - 1]) ); /* Fold by 4. */ while (inlen >= 4 * 16) { asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t" "movdqa %%xmm0, %%xmm6\n\t" "pshufb %%xmm7, %%xmm5\n\t" "pclmulqdq $0x01, %%xmm4, %%xmm0\n\t" "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm6, %%xmm0\n\t" "movdqu %[inbuf_1], %%xmm5\n\t" "movdqa %%xmm1, %%xmm6\n\t" "pshufb %%xmm7, %%xmm5\n\t" "pclmulqdq $0x01, %%xmm4, %%xmm1\n\t" "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm1\n\t" "pxor %%xmm6, %%xmm1\n\t" "movdqu %[inbuf_2], %%xmm5\n\t" "movdqa %%xmm2, %%xmm6\n\t" "pshufb %%xmm7, %%xmm5\n\t" "pclmulqdq $0x01, %%xmm4, %%xmm2\n\t" "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm2\n\t" "pxor %%xmm6, %%xmm2\n\t" "movdqu %[inbuf_3], %%xmm5\n\t" "movdqa %%xmm3, %%xmm6\n\t" "pshufb %%xmm7, %%xmm5\n\t" "pclmulqdq $0x01, %%xmm4, %%xmm3\n\t" "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm6, %%xmm3\n\t" : : [inbuf_0] "m" (inbuf[0 * 16]), [inbuf_1] "m" (inbuf[1 * 16]), [inbuf_2] "m" (inbuf[2 * 16]), [inbuf_3] "m" (inbuf[3 * 16]) ); inbuf += 4 * 16; inlen -= 4 * 16; } asm volatile ("movdqa %[k3k4], %%xmm6\n\t" "movdqa %[my_p], %%xmm5\n\t" : : [k3k4] "m" (consts->k[3 - 1]), [my_p] "m" (consts->my_p[0]) ); /* Fold 4 to 1. */ asm volatile ("movdqa %%xmm0, %%xmm4\n\t" "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" "pxor %%xmm1, %%xmm0\n\t" "pxor %%xmm4, %%xmm0\n\t" "movdqa %%xmm0, %%xmm4\n\t" "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" "pxor %%xmm2, %%xmm0\n\t" "pxor %%xmm4, %%xmm0\n\t" "movdqa %%xmm0, %%xmm4\n\t" "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" "pxor %%xmm3, %%xmm0\n\t" "pxor %%xmm4, %%xmm0\n\t" : : ); } else { asm volatile ("movd %[crc], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "movdqa %[k3k4], %%xmm6\n\t" "pxor %%xmm1, %%xmm0\n\t" "movdqa %[my_p], %%xmm5\n\t" "pshufb %%xmm7, %%xmm0\n\t" : : [inbuf] "m" (*inbuf), [crc] "m" (*pcrc), [k3k4] "m" (consts->k[3 - 1]), [my_p] "m" (consts->my_p[0]) ); inbuf += 16; inlen -= 16; } /* Fold by 1. */ if (inlen >= 16) { while (inlen >= 16) { /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */ asm volatile ("movdqu %[inbuf], %%xmm2\n\t" "movdqa %%xmm0, %%xmm1\n\t" "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" "pshufb %%xmm7, %%xmm2\n\t" "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t" "pxor %%xmm2, %%xmm0\n\t" "pxor %%xmm1, %%xmm0\n\t" : : [inbuf] "m" (*inbuf) ); inbuf += 16; inlen -= 16; } } /* Partial fold. */ if (inlen) { /* Load last input and add padding zeros. */ asm volatile ("movdqu %[shl_shuf], %%xmm4\n\t" "movdqu %[shr_shuf], %%xmm3\n\t" "movdqu %[mask], %%xmm2\n\t" "movdqa %%xmm0, %%xmm1\n\t" "pshufb %%xmm4, %%xmm0\n\t" "movdqu %[inbuf], %%xmm4\n\t" "pshufb %%xmm3, %%xmm1\n\t" "pand %%xmm4, %%xmm2\n\t" "por %%xmm1, %%xmm2\n\t" "pshufb %%xmm7, %%xmm2\n\t" "movdqa %%xmm0, %%xmm1\n\t" "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t" "pxor %%xmm2, %%xmm0\n\t" "pxor %%xmm1, %%xmm0\n\t" : : [inbuf] "m" (*(inbuf - 16 + inlen)), [mask] "m" (crc32_partial_fold_input_mask[inlen]), [shl_shuf] "m" (crc32_refl_shuf_shift[32 - inlen]), [shr_shuf] "m" (crc32_shuf_shift[inlen + 16]) ); inbuf += inlen; inlen -= inlen; } /* Final fold. */ asm volatile (/* reduce 128-bits to 96-bits */ "movdqa %%xmm0, %%xmm1\n\t" "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t" "pslldq $8, %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */ /* reduce 96-bits to 64-bits */ "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */ "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */ "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */ "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */ /* barrett reduction */ "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */ "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */ "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */ "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" "pxor %%xmm1, %%xmm0\n\t" /* store CRC in input endian */ "movd %%xmm0, %%eax\n\t" "bswapl %%eax\n\t" "movl %%eax, %[out]\n\t" : [out] "=m" (*pcrc) : [k5] "m" (consts->k[5 - 1]) : "eax" ); } -static inline void +static ASM_FUNC_ATTR_INLINE void crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, const struct crc32_consts_s *consts) { if (inlen < 4) { u32 crc = *pcrc; u32 data; asm volatile ("movdqa %[my_p], %%xmm5\n\t" : : [my_p] "m" (consts->my_p[0]) ); if (inlen == 1) { data = inbuf[0]; data ^= crc; data = _gcry_bswap32(data << 24); crc = _gcry_bswap32(crc >> 8); } else if (inlen == 2) { data = ((const struct u16_unaligned_s *)inbuf)->a; data ^= crc; data = _gcry_bswap32(data << 16); crc = _gcry_bswap32(crc >> 16); } else { data = ((const struct u16_unaligned_s *)inbuf)->a; data |= inbuf[2] << 16; data ^= crc; data = _gcry_bswap32(data << 8); crc = _gcry_bswap32(crc >> 24); } /* Barrett reduction */ asm volatile ("movd %[in], %%xmm0\n\t" "psllq $32, %%xmm0\n\t" /* [00][00][xx][00] */ "movd %[crc], %%xmm1\n\t" "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */ "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ "pxor %%xmm1, %%xmm0\n\t" /* store CRC in input endian */ "movd %%xmm0, %%eax\n\t" "bswapl %%eax\n\t" "movl %%eax, %[out]\n\t" : [out] "=m" (*pcrc) : [in] "r" (data), [crc] "r" (crc) : "eax" ); } else if (inlen == 4) { /* Barrett reduction */ asm volatile ("movd %[crc], %%xmm0\n\t" "movd %[in], %%xmm1\n\t" "movdqa %[my_p], %%xmm5\n\t" : : [in] "m" (*inbuf), [crc] "m" (*pcrc), [my_p] "m" (consts->my_p[0]) : "cc" ); asm volatile ("pxor %%xmm1, %%xmm0\n\t" "pshufb %[bswap], %%xmm0\n\t" /* [xx][00][00][00] */ "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */ "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ : : [bswap] "m" (*crc32_bswap_shuf) : "cc" ); asm volatile (/* store CRC in input endian */ "movd %%xmm0, %%eax\n\t" "bswapl %%eax\n\t" "movl %%eax, %[out]\n\t" : [out] "=m" (*pcrc) : : "eax", "cc" ); } else { asm volatile ("movdqu %[shuf], %%xmm7\n\t" "movd %[crc], %%xmm1\n\t" "movdqa %[my_p], %%xmm5\n\t" "movdqa %[k3k4], %%xmm6\n\t" : : [shuf] "m" (crc32_shuf_shift[32 - inlen]), [crc] "m" (*pcrc), [my_p] "m" (consts->my_p[0]), [k3k4] "m" (consts->k[3 - 1]) ); if (inlen >= 8) { asm volatile ("movq %[inbuf], %%xmm0\n\t" : : [inbuf] "m" (*inbuf) ); if (inlen > 8) { asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/ "movq %[inbuf_tail], %%xmm2\n\t" "punpcklqdq %%xmm2, %%xmm0\n\t" "pshufb %[merge_shuf], %%xmm0\n\t" : : [inbuf_tail] "m" (inbuf[inlen - 8]), [merge_shuf] "m" (*crc32_merge9to15_shuf[inlen - 9]) ); } } else { asm volatile ("movd %[inbuf], %%xmm0\n\t" "pinsrd $1, %[inbuf_tail], %%xmm0\n\t" "pshufb %[merge_shuf], %%xmm0\n\t" : : [inbuf] "m" (*inbuf), [inbuf_tail] "m" (inbuf[inlen - 4]), [merge_shuf] "m" (*crc32_merge5to7_shuf[inlen - 5]) ); } /* Final fold. */ asm volatile ("pxor %%xmm1, %%xmm0\n\t" "pshufb %%xmm7, %%xmm0\n\t" /* reduce 128-bits to 96-bits */ "movdqa %%xmm0, %%xmm1\n\t" "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t" "pslldq $8, %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */ /* reduce 96-bits to 64-bits */ "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */ "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */ "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */ "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */ /* barrett reduction */ "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */ "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */ "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */ "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" "pxor %%xmm1, %%xmm0\n\t" /* store CRC in input endian */ "movd %%xmm0, %%eax\n\t" "bswapl %%eax\n\t" "movl %%eax, %[out]\n\t" : [out] "=m" (*pcrc) : [k5] "m" (consts->k[5 - 1]) : "eax" ); } } -void +void ASM_FUNC_ATTR _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) { const struct crc32_consts_s *consts = &crc32_consts; #if defined(__x86_64__) && defined(__WIN64__) char win64tmp[2 * 16]; /* XMM6-XMM7 need to be restored after use. */ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" "movdqu %%xmm7, 1*16(%0)\n\t" : : "r" (win64tmp) : "memory"); #endif if (!inlen) return; if (inlen >= 16) crc32_reflected_bulk(pcrc, inbuf, inlen, consts); else crc32_reflected_less_than_16(pcrc, inbuf, inlen, consts); #if defined(__x86_64__) && defined(__WIN64__) /* Restore used registers. */ asm volatile("movdqu 0*16(%0), %%xmm6\n\t" "movdqu 1*16(%0), %%xmm7\n\t" : : "r" (win64tmp) : "memory"); #endif } -void +void ASM_FUNC_ATTR _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) { const struct crc32_consts_s *consts = &crc24rfc2440_consts; #if defined(__x86_64__) && defined(__WIN64__) char win64tmp[2 * 16]; /* XMM6-XMM7 need to be restored after use. */ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" "movdqu %%xmm7, 1*16(%0)\n\t" : : "r" (win64tmp) : "memory"); #endif if (!inlen) return; /* Note: *pcrc in input endian. */ if (inlen >= 16) crc32_bulk(pcrc, inbuf, inlen, consts); else crc32_less_than_16(pcrc, inbuf, inlen, consts); #if defined(__x86_64__) && defined(__WIN64__) /* Restore used registers. */ asm volatile("movdqu 0*16(%0), %%xmm6\n\t" "movdqu 1*16(%0), %%xmm7\n\t" : : "r" (win64tmp) : "memory"); #endif } #if __clang__ # pragma clang attribute pop #endif #endif /* USE_INTEL_PCLMUL */ diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index a2a62abd..b26449a7 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -1,3886 +1,3891 @@ /* AES-NI accelerated AES for Libgcrypt * Copyright (C) 2000, 2001, 2002, 2003, 2007, * 2008, 2011, 2012 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_AESNI #if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif #if __clang__ # pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) #endif #define ALWAYS_INLINE inline __attribute__((always_inline)) #define NO_INLINE __attribute__((noinline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE +#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE typedef struct u128_s { u32 a, b, c, d; } __attribute__((packed, aligned(1), may_alias)) u128_t; /* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l because of 'pragma target'. */ -static ALWAYS_INLINE const unsigned char * +static ASM_FUNC_ATTR_INLINE const unsigned char * aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) { unsigned long ntz; /* Assumes that N != 0. */ asm ("rep;bsfl %k[low], %k[ntz]\n\t" : [ntz] "=r" (ntz) : [low] "r" ((unsigned long)n) : "cc"); return c->u_mode.ocb.L[ntz]; } /* Two macros to be called prior and after the use of AESNI instructions. There should be no external function calls between the use of these macros. There purpose is to make sure that the SSE regsiters are cleared and won't reveal any information about the key or the data. */ #ifdef __WIN64__ /* XMM6-XMM15 are callee-saved registers on WIN64. */ # define aesni_prepare_2_7_variable char win64tmp[16 * 2] # define aesni_prepare_8_15_variable char win64tmp8_15[16 * 8] # define aesni_prepare() do { } while (0) # define aesni_prepare_2_7() \ do { asm volatile ("movdqu %%xmm6, %0\n\t" \ "movdqu %%xmm7, %1\n\t" \ : "=m" (*win64tmp), "=m" (*(win64tmp+16)) \ : \ : "memory"); \ } while (0) # define aesni_prepare_8_15() \ do { asm volatile ("movdqu %%xmm8, 0*16(%0)\n\t" \ "movdqu %%xmm9, 1*16(%0)\n\t" \ "movdqu %%xmm10, 2*16(%0)\n\t" \ "movdqu %%xmm11, 3*16(%0)\n\t" \ "movdqu %%xmm12, 4*16(%0)\n\t" \ "movdqu %%xmm13, 5*16(%0)\n\t" \ "movdqu %%xmm14, 6*16(%0)\n\t" \ "movdqu %%xmm15, 7*16(%0)\n\t" \ : \ : "r" (win64tmp8_15) \ : "memory"); \ } while (0) # define aesni_cleanup() \ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ "pxor %%xmm1, %%xmm1\n" :: ); \ } while (0) # define aesni_cleanup_2_7() \ do { asm volatile ("movdqu %0, %%xmm6\n\t" \ "movdqu %1, %%xmm7\n\t" \ "pxor %%xmm2, %%xmm2\n" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ : \ : "m" (*win64tmp), "m" (*(win64tmp+16)) \ : "memory"); \ } while (0) # define aesni_cleanup_8_15() \ do { asm volatile ("movdqu 0*16(%0), %%xmm8\n\t" \ "movdqu 1*16(%0), %%xmm9\n\t" \ "movdqu 2*16(%0), %%xmm10\n\t" \ "movdqu 3*16(%0), %%xmm11\n\t" \ "movdqu 4*16(%0), %%xmm12\n\t" \ "movdqu 5*16(%0), %%xmm13\n\t" \ "movdqu 6*16(%0), %%xmm14\n\t" \ "movdqu 7*16(%0), %%xmm15\n\t" \ : \ : "r" (win64tmp8_15) \ : "memory"); \ } while (0) #else # define aesni_prepare_2_7_variable # define aesni_prepare() do { } while (0) # define aesni_prepare_2_7() do { } while (0) # define aesni_cleanup() \ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ "pxor %%xmm1, %%xmm1\n" :: ); \ } while (0) # define aesni_cleanup_2_7() \ do { asm volatile ("pxor %%xmm7, %%xmm7\n\t" \ "pxor %%xmm2, %%xmm2\n\t" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ "pxor %%xmm6, %%xmm6\n":: ); \ } while (0) # ifdef __x86_64__ # define aesni_prepare_8_15_variable # define aesni_prepare_8_15() do { } while (0) # define aesni_cleanup_8_15() \ do { asm volatile ("pxor %%xmm8, %%xmm8\n" \ "pxor %%xmm9, %%xmm9\n" \ "pxor %%xmm10, %%xmm10\n" \ "pxor %%xmm11, %%xmm11\n" \ "pxor %%xmm12, %%xmm12\n" \ "pxor %%xmm13, %%xmm13\n" \ "pxor %%xmm14, %%xmm14\n" \ "pxor %%xmm15, %%xmm15\n":: ); \ } while (0) # endif #endif -void +void ASM_FUNC_ATTR _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) { aesni_prepare_2_7_variable; aesni_prepare(); aesni_prepare_2_7(); if (ctx->rounds < 12) { /* 128-bit key */ #define AESKEYGENASSIST_xmm1_xmm2(imm8) \ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t" #define AESKEY_EXPAND128 \ "pshufd $0xff, %%xmm2, %%xmm2\n\t" \ "movdqa %%xmm1, %%xmm3\n\t" \ "pslldq $4, %%xmm3\n\t" \ "pxor %%xmm3, %%xmm1\n\t" \ "pslldq $4, %%xmm3\n\t" \ "pxor %%xmm3, %%xmm1\n\t" \ "pslldq $4, %%xmm3\n\t" \ "pxor %%xmm3, %%xmm2\n\t" \ "pxor %%xmm2, %%xmm1\n\t" asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key */ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x01) AESKEY_EXPAND128 "movdqa %%xmm1, 0x10(%[ksch])\n\t" /* ksch[1] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x02) AESKEY_EXPAND128 "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x04) AESKEY_EXPAND128 "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x08) AESKEY_EXPAND128 "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x10) AESKEY_EXPAND128 "movdqa %%xmm1, 0x50(%[ksch])\n\t" /* ksch[5] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x20) AESKEY_EXPAND128 "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x40) AESKEY_EXPAND128 "movdqa %%xmm1, 0x70(%[ksch])\n\t" /* ksch[7] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x80) AESKEY_EXPAND128 "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x1b) AESKEY_EXPAND128 "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x36) AESKEY_EXPAND128 "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1 */ : : [key] "r" (key), [ksch] "r" (ctx->keyschenc) : "cc", "memory" ); #undef AESKEYGENASSIST_xmm1_xmm2 #undef AESKEY_EXPAND128 } else if (ctx->rounds == 12) { /* 192-bit key */ #define AESKEYGENASSIST_xmm3_xmm2(imm8) \ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t" #define AESKEY_EXPAND192 \ "pshufd $0x55, %%xmm2, %%xmm2\n\t" \ "movdqu %%xmm1, %%xmm4\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pxor %%xmm2, %%xmm1\n\t" \ "pshufd $0xff, %%xmm1, %%xmm2\n\t" \ "movdqu %%xmm3, %%xmm4\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm3\n\t" \ "pxor %%xmm2, %%xmm3\n\t" asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key[0..15] */ "movq 16(%[key]), %%xmm3\n\t" /* xmm3 := key[16..23] */ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ "movdqa %%xmm3, %%xmm5\n\t" AESKEYGENASSIST_xmm3_xmm2(0x01) AESKEY_EXPAND192 "shufpd $0, %%xmm1, %%xmm5\n\t" "movdqa %%xmm5, 0x10(%[ksch])\n\t" /* ksch[1] := xmm5 */ "movdqa %%xmm1, %%xmm6\n\t" "shufpd $1, %%xmm3, %%xmm6\n\t" "movdqa %%xmm6, 0x20(%[ksch])\n\t" /* ksch[2] := xmm6 */ AESKEYGENASSIST_xmm3_xmm2(0x02) AESKEY_EXPAND192 "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1 */ "movdqa %%xmm3, %%xmm5\n\t" AESKEYGENASSIST_xmm3_xmm2(0x04) AESKEY_EXPAND192 "shufpd $0, %%xmm1, %%xmm5\n\t" "movdqa %%xmm5, 0x40(%[ksch])\n\t" /* ksch[4] := xmm5 */ "movdqa %%xmm1, %%xmm6\n\t" "shufpd $1, %%xmm3, %%xmm6\n\t" "movdqa %%xmm6, 0x50(%[ksch])\n\t" /* ksch[5] := xmm6 */ AESKEYGENASSIST_xmm3_xmm2(0x08) AESKEY_EXPAND192 "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ "movdqa %%xmm3, %%xmm5\n\t" AESKEYGENASSIST_xmm3_xmm2(0x10) AESKEY_EXPAND192 "shufpd $0, %%xmm1, %%xmm5\n\t" "movdqa %%xmm5, 0x70(%[ksch])\n\t" /* ksch[7] := xmm5 */ "movdqa %%xmm1, %%xmm6\n\t" "shufpd $1, %%xmm3, %%xmm6\n\t" "movdqa %%xmm6, 0x80(%[ksch])\n\t" /* ksch[8] := xmm6 */ AESKEYGENASSIST_xmm3_xmm2(0x20) AESKEY_EXPAND192 "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1 */ "movdqa %%xmm3, %%xmm5\n\t" AESKEYGENASSIST_xmm3_xmm2(0x40) AESKEY_EXPAND192 "shufpd $0, %%xmm1, %%xmm5\n\t" "movdqa %%xmm5, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm5 */ "movdqa %%xmm1, %%xmm6\n\t" "shufpd $1, %%xmm3, %%xmm6\n\t" "movdqa %%xmm6, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm6 */ AESKEYGENASSIST_xmm3_xmm2(0x80) AESKEY_EXPAND192 "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1 */ : : [key] "r" (key), [ksch] "r" (ctx->keyschenc) : "cc", "memory" ); #undef AESKEYGENASSIST_xmm3_xmm2 #undef AESKEY_EXPAND192 } else if (ctx->rounds > 12) { /* 256-bit key */ #define AESKEYGENASSIST_xmm1_xmm2(imm8) \ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t" #define AESKEYGENASSIST_xmm3_xmm2(imm8) \ ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t" #define AESKEY_EXPAND256_A \ "pshufd $0xff, %%xmm2, %%xmm2\n\t" \ "movdqa %%xmm1, %%xmm4\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm1\n\t" \ "pxor %%xmm2, %%xmm1\n\t" #define AESKEY_EXPAND256_B \ "pshufd $0xaa, %%xmm2, %%xmm2\n\t" \ "movdqa %%xmm3, %%xmm4\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm3\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm3\n\t" \ "pslldq $4, %%xmm4\n\t" \ "pxor %%xmm4, %%xmm3\n\t" \ "pxor %%xmm2, %%xmm3\n\t" asm volatile ("movdqu (%[key]), %%xmm1\n\t" /* xmm1 := key[0..15] */ "movdqu 16(%[key]), %%xmm3\n\t" /* xmm3 := key[16..31] */ "movdqa %%xmm1, (%[ksch])\n\t" /* ksch[0] := xmm1 */ "movdqa %%xmm3, 0x10(%[ksch])\n\t" /* ksch[1] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x01) AESKEY_EXPAND256_A "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0x30(%[ksch])\n\t" /* ksch[3] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x02) AESKEY_EXPAND256_A "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0x50(%[ksch])\n\t" /* ksch[5] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x04) AESKEY_EXPAND256_A "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0x70(%[ksch])\n\t" /* ksch[7] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x08) AESKEY_EXPAND256_A "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0x90(%[ksch])\n\t" /* ksch[9] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x10) AESKEY_EXPAND256_A "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x20) AESKEY_EXPAND256_A "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1 */ AESKEYGENASSIST_xmm1_xmm2(0x00) AESKEY_EXPAND256_B "movdqa %%xmm3, 0xd0(%[ksch])\n\t" /* ksch[13] := xmm3 */ AESKEYGENASSIST_xmm3_xmm2(0x40) AESKEY_EXPAND256_A "movdqa %%xmm1, 0xe0(%[ksch])\n\t" /* ksch[14] := xmm1 */ : : [key] "r" (key), [ksch] "r" (ctx->keyschenc) : "cc", "memory" ); #undef AESKEYGENASSIST_xmm1_xmm2 #undef AESKEYGENASSIST_xmm3_xmm2 #undef AESKEY_EXPAND256_A #undef AESKEY_EXPAND256_B } aesni_cleanup(); aesni_cleanup_2_7(); } /* Make a decryption key from an encryption key. */ -static ALWAYS_INLINE void +static ASM_FUNC_ATTR_INLINE void do_aesni_prepare_decryption (RIJNDAEL_context *ctx) { /* The AES-NI decrypt instructions use the Equivalent Inverse Cipher, thus we can't use the the standard decrypt key preparation. */ u128_t *ekey = (u128_t *)ctx->keyschenc; u128_t *dkey = (u128_t *)ctx->keyschdec; int rr; int r; #define DO_AESNI_AESIMC() \ asm volatile ("movdqa %[ekey], %%xmm1\n\t" \ /*"aesimc %%xmm1, %%xmm1\n\t"*/ \ ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t" \ "movdqa %%xmm1, %[dkey]" \ : [dkey] "=m" (dkey[r]) \ : [ekey] "m" (ekey[rr]) \ : "memory") dkey[0] = ekey[ctx->rounds]; r=1; rr=ctx->rounds-1; DO_AESNI_AESIMC(); r++; rr--; /* round 1 */ DO_AESNI_AESIMC(); r++; rr--; /* round 2 */ DO_AESNI_AESIMC(); r++; rr--; /* round 3 */ DO_AESNI_AESIMC(); r++; rr--; /* round 4 */ DO_AESNI_AESIMC(); r++; rr--; /* round 5 */ DO_AESNI_AESIMC(); r++; rr--; /* round 6 */ DO_AESNI_AESIMC(); r++; rr--; /* round 7 */ DO_AESNI_AESIMC(); r++; rr--; /* round 8 */ DO_AESNI_AESIMC(); r++; rr--; /* round 9 */ if (ctx->rounds > 10) { DO_AESNI_AESIMC(); r++; rr--; /* round 10 */ DO_AESNI_AESIMC(); r++; rr--; /* round 11 */ if (ctx->rounds > 12) { DO_AESNI_AESIMC(); r++; rr--; /* round 12 */ DO_AESNI_AESIMC(); r++; rr--; /* round 13 */ } } dkey[r] = ekey[0]; #undef DO_AESNI_AESIMC } -void +void ASM_FUNC_ATTR _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx) { aesni_prepare(); do_aesni_prepare_decryption (ctx); aesni_cleanup(); } /* Encrypt one block using the Intel AES-NI instructions. Block is input * and output through SSE register xmm0. */ -static ALWAYS_INLINE void +static ASM_FUNC_ATTR_INLINE void do_aesni_enc (const RIJNDAEL_context *ctx) { #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" asm volatile ("movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x20(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x30(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x40(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x50(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x60(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x70(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x80(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x90(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xa0(%[key]), %%xmm1\n\t" "cmpl $10, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 "movdqa 0xb0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xc0(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 "movdqa 0xd0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xe0(%[key]), %%xmm1\n" ".Lenclast%=:\n\t" aesenclast_xmm1_xmm0 "\n" : : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); #undef aesenc_xmm1_xmm0 #undef aesenclast_xmm1_xmm0 } /* Decrypt one block using the Intel AES-NI instructions. Block is input * and output through SSE register xmm0. */ -static ALWAYS_INLINE void +static ASM_FUNC_ATTR_INLINE void do_aesni_dec (const RIJNDAEL_context *ctx) { #define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t" #define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t" asm volatile ("movdqa (%[key]), %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x20(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x30(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x40(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x50(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x60(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x70(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x80(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0x90(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0xa0(%[key]), %%xmm1\n\t" "cmpl $10, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesdec_xmm1_xmm0 "movdqa 0xb0(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0xc0(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesdec_xmm1_xmm0 "movdqa 0xd0(%[key]), %%xmm1\n\t" aesdec_xmm1_xmm0 "movdqa 0xe0(%[key]), %%xmm1\n" ".Ldeclast%=:\n\t" aesdeclast_xmm1_xmm0 "\n" : : [key] "r" (ctx->keyschdec), [rounds] "r" (ctx->rounds) : "cc", "memory"); #undef aesdec_xmm1_xmm0 #undef aesdeclast_xmm1_xmm0 } /* Encrypt four blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4. */ -static ALWAYS_INLINE void +static ASM_FUNC_ATTR_INLINE void do_aesni_enc_vec4 (const RIJNDAEL_context *ctx) { #define aesenc_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc8\n\t" #define aesenc_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd0\n\t" #define aesenc_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd8\n\t" #define aesenc_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe0\n\t" #define aesenclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc8\n\t" #define aesenclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd0\n\t" #define aesenclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd8\n\t" #define aesenclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe0\n\t" asm volatile ("movdqa (%[key]), %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x20(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x30(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x40(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x50(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x60(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x70(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x80(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0x90(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xa0(%[key]), %%xmm0\n\t" "cmpl $10, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xb0(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xc0(%[key]), %%xmm0\n\t" "cmpl $12, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xd0(%[key]), %%xmm0\n\t" aesenc_xmm0_xmm1 aesenc_xmm0_xmm2 aesenc_xmm0_xmm3 aesenc_xmm0_xmm4 "movdqa 0xe0(%[key]), %%xmm0\n" ".Ldeclast%=:\n\t" aesenclast_xmm0_xmm1 aesenclast_xmm0_xmm2 aesenclast_xmm0_xmm3 aesenclast_xmm0_xmm4 : /* no output */ : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); #undef aesenc_xmm0_xmm1 #undef aesenc_xmm0_xmm2 #undef aesenc_xmm0_xmm3 #undef aesenc_xmm0_xmm4 #undef aesenclast_xmm0_xmm1 #undef aesenclast_xmm0_xmm2 #undef aesenclast_xmm0_xmm3 #undef aesenclast_xmm0_xmm4 } /* Decrypt four blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4. */ -static ALWAYS_INLINE void +static ASM_FUNC_ATTR_INLINE void do_aesni_dec_vec4 (const RIJNDAEL_context *ctx) { #define aesdec_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc8\n\t" #define aesdec_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd0\n\t" #define aesdec_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd8\n\t" #define aesdec_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xde, 0xe0\n\t" #define aesdeclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc8\n\t" #define aesdeclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd0\n\t" #define aesdeclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd8\n\t" #define aesdeclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xe0\n\t" asm volatile ("movdqa (%[key]), %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x20(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x30(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x40(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x50(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x60(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x70(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x80(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0x90(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xa0(%[key]), %%xmm0\n\t" "cmpl $10, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xb0(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xc0(%[key]), %%xmm0\n\t" "cmpl $12, %[rounds]\n\t" "jz .Ldeclast%=\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xd0(%[key]), %%xmm0\n\t" aesdec_xmm0_xmm1 aesdec_xmm0_xmm2 aesdec_xmm0_xmm3 aesdec_xmm0_xmm4 "movdqa 0xe0(%[key]), %%xmm0\n" ".Ldeclast%=:\n\t" aesdeclast_xmm0_xmm1 aesdeclast_xmm0_xmm2 aesdeclast_xmm0_xmm3 aesdeclast_xmm0_xmm4 : /* no output */ : [key] "r" (ctx->keyschdec), [rounds] "r" (ctx->rounds) : "cc", "memory"); #undef aesdec_xmm0_xmm1 #undef aesdec_xmm0_xmm2 #undef aesdec_xmm0_xmm3 #undef aesdec_xmm0_xmm4 #undef aesdeclast_xmm0_xmm1 #undef aesdeclast_xmm0_xmm2 #undef aesdeclast_xmm0_xmm3 #undef aesdeclast_xmm0_xmm4 } #ifdef __x86_64__ /* Encrypt eight blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */ -static ALWAYS_INLINE void +static ASM_FUNC_ATTR_INLINE void do_aesni_enc_vec8 (const RIJNDAEL_context *ctx) { asm volatile ("movdqa (%[key]), %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ "pxor %%xmm0, %%xmm8\n\t" /* xmm8 ^= key[0] */ "pxor %%xmm0, %%xmm9\n\t" /* xmm9 ^= key[0] */ "pxor %%xmm0, %%xmm10\n\t" /* xmm10 ^= key[0] */ "pxor %%xmm0, %%xmm11\n\t" /* xmm11 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm0\n\t" "cmpl $12, %[rounds]\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x20(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x30(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x40(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x50(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x60(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x70(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x80(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x90(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xa0(%[key]), %%xmm0\n\t" "jb .Ldeclast%=\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xb0(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xc0(%[key]), %%xmm0\n\t" "je .Ldeclast%=\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xd0(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xe0(%[key]), %%xmm0\n" ".Ldeclast%=:\n\t" "aesenclast %%xmm0, %%xmm1\n\t" "aesenclast %%xmm0, %%xmm2\n\t" "aesenclast %%xmm0, %%xmm3\n\t" "aesenclast %%xmm0, %%xmm4\n\t" "aesenclast %%xmm0, %%xmm8\n\t" "aesenclast %%xmm0, %%xmm9\n\t" "aesenclast %%xmm0, %%xmm10\n\t" "aesenclast %%xmm0, %%xmm11\n\t" : /* no output */ : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); } /* Decrypt eight blocks using the Intel AES-NI instructions. Blocks are input * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11. */ -static ALWAYS_INLINE void +static ASM_FUNC_ATTR_INLINE void do_aesni_dec_vec8 (const RIJNDAEL_context *ctx) { asm volatile ("movdqa (%[key]), %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" /* xmm1 ^= key[0] */ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm0, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm0, %%xmm4\n\t" /* xmm4 ^= key[0] */ "pxor %%xmm0, %%xmm8\n\t" /* xmm8 ^= key[0] */ "pxor %%xmm0, %%xmm9\n\t" /* xmm9 ^= key[0] */ "pxor %%xmm0, %%xmm10\n\t" /* xmm10 ^= key[0] */ "pxor %%xmm0, %%xmm11\n\t" /* xmm11 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm0\n\t" "cmpl $12, %[rounds]\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x20(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x30(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x40(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x50(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x60(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x70(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x80(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x90(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xa0(%[key]), %%xmm0\n\t" "jb .Ldeclast%=\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xb0(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xc0(%[key]), %%xmm0\n\t" "je .Ldeclast%=\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xd0(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xe0(%[key]), %%xmm0\n" ".Ldeclast%=:\n\t" "aesdeclast %%xmm0, %%xmm1\n\t" "aesdeclast %%xmm0, %%xmm2\n\t" "aesdeclast %%xmm0, %%xmm3\n\t" "aesdeclast %%xmm0, %%xmm4\n\t" "aesdeclast %%xmm0, %%xmm8\n\t" "aesdeclast %%xmm0, %%xmm9\n\t" "aesdeclast %%xmm0, %%xmm10\n\t" "aesdeclast %%xmm0, %%xmm11\n\t" : /* no output */ : [key] "r" (ctx->keyschdec), [rounds] "r" (ctx->rounds) : "cc", "memory"); } #endif /* __x86_64__ */ /* Perform a CTR encryption round using the counter CTR and the input block A. Write the result to the output block B and update CTR. CTR needs to be a 16 byte aligned little-endian value. */ -static void +static ASM_FUNC_ATTR_INLINE void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr, unsigned char *b, const unsigned char *a) { #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" asm volatile ("movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */ "pcmpeqd %%xmm1, %%xmm1\n\t" "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ "pshufb %%xmm6, %%xmm5\n\t" "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ (big endian) */ /* detect if 64-bit carry handling is needed */ "cmpl $0xffffffff, 8(%[ctr])\n\t" "jne .Lno_carry%=\n\t" "cmpl $0xffffffff, 12(%[ctr])\n\t" "jne .Lno_carry%=\n\t" "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ "psubq %%xmm1, %%xmm5\n\t" /* add carry to upper 64bits */ ".Lno_carry%=:\n\t" "pshufb %%xmm6, %%xmm5\n\t" "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */ "pxor (%[key]), %%xmm0\n\t" /* xmm1 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x20(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x30(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x40(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x50(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x60(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x70(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x80(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0x90(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xa0(%[key]), %%xmm1\n\t" "cmpl $10, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 "movdqa 0xb0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xc0(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 "movdqa 0xd0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 "movdqa 0xe0(%[key]), %%xmm1\n" ".Lenclast%=:\n\t" aesenclast_xmm1_xmm0 "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ : [dst] "=m" (*b) : [src] "m" (*a), [ctr] "r" (ctr), [key] "r" (ctx->keyschenc), [rounds] "g" (ctx->rounds) : "cc", "memory"); #undef aesenc_xmm1_xmm0 #undef aesenclast_xmm1_xmm0 } /* Four blocks at a time variant of do_aesni_ctr. */ -static void +static ASM_FUNC_ATTR_INLINE void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr, unsigned char *b, const unsigned char *a) { static const byte bige_addb_const[4][16] __attribute__ ((aligned (16))) = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 } }; const void *bige_addb = bige_addb_const; #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" #define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t" #define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t" #define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t" #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" #define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t" #define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t" #define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t" /* Register usage: [key] keyschedule xmm0 CTR-0 xmm1 temp / round key xmm2 CTR-1 xmm3 CTR-2 xmm4 CTR-3 xmm5 copy of *ctr xmm6 endian swapping mask */ asm volatile (/* detect if 8-bit carry handling is needed */ "cmpb $0xfb, 15(%[ctr])\n\t" "ja .Ladd32bit%=\n\t" "movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */ "movdqa 0*16(%[addb]), %%xmm2\n\t" /* xmm2 := be(1) */ "movdqa 1*16(%[addb]), %%xmm3\n\t" /* xmm3 := be(2) */ "movdqa 2*16(%[addb]), %%xmm4\n\t" /* xmm4 := be(3) */ "movdqa 3*16(%[addb]), %%xmm5\n\t" /* xmm5 := be(4) */ "paddb %%xmm0, %%xmm2\n\t" /* xmm2 := be(1) + CTR (xmm0) */ "paddb %%xmm0, %%xmm3\n\t" /* xmm3 := be(2) + CTR (xmm0) */ "paddb %%xmm0, %%xmm4\n\t" /* xmm4 := be(3) + CTR (xmm0) */ "paddb %%xmm0, %%xmm5\n\t" /* xmm5 := be(4) + CTR (xmm0) */ "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "jmp .Lstore_ctr%=\n\t" ".Ladd32bit%=:\n\t" "movdqa %%xmm5, %%xmm0\n\t" /* xmm0, xmm2 := CTR (xmm5) */ "movdqa %%xmm0, %%xmm2\n\t" "pcmpeqd %%xmm1, %%xmm1\n\t" "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := le(xmm2) */ "psubq %%xmm1, %%xmm2\n\t" /* xmm2++ */ "movdqa %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ "psubq %%xmm1, %%xmm3\n\t" /* xmm3++ */ "movdqa %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ "psubq %%xmm1, %%xmm4\n\t" /* xmm4++ */ "movdqa %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */ "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ */ /* detect if 64-bit carry handling is needed */ "cmpl $0xffffffff, 8(%[ctr])\n\t" "jne .Lno_carry%=\n\t" "movl 12(%[ctr]), %%esi\n\t" "bswapl %%esi\n\t" "cmpl $0xfffffffc, %%esi\n\t" "jb .Lno_carry%=\n\t" /* no carry */ "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ "je .Lcarry_xmm5%=\n\t" /* esi == 0xfffffffc */ "cmpl $0xfffffffe, %%esi\n\t" "jb .Lcarry_xmm4%=\n\t" /* esi == 0xfffffffd */ "je .Lcarry_xmm3%=\n\t" /* esi == 0xfffffffe */ /* esi == 0xffffffff */ "psubq %%xmm1, %%xmm2\n\t" ".Lcarry_xmm3%=:\n\t" "psubq %%xmm1, %%xmm3\n\t" ".Lcarry_xmm4%=:\n\t" "psubq %%xmm1, %%xmm4\n\t" ".Lcarry_xmm5%=:\n\t" "psubq %%xmm1, %%xmm5\n\t" ".Lno_carry%=:\n\t" "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := be(xmm2) */ "pshufb %%xmm6, %%xmm3\n\t" /* xmm3 := be(xmm3) */ "pshufb %%xmm6, %%xmm4\n\t" /* xmm4 := be(xmm4) */ "pshufb %%xmm6, %%xmm5\n\t" /* xmm5 := be(xmm5) */ ".Lstore_ctr%=:\n\t" "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */ : : [ctr] "r" (ctr), [key] "r" (ctx->keyschenc), [addb] "r" (bige_addb) : "%esi", "cc", "memory"); asm volatile ("pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x20(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x30(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x40(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x50(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x60(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x70(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x80(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0x90(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xa0(%[key]), %%xmm1\n\t" "cmpl $10, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xb0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xc0(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "jz .Lenclast%=\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xd0(%[key]), %%xmm1\n\t" aesenc_xmm1_xmm0 aesenc_xmm1_xmm2 aesenc_xmm1_xmm3 aesenc_xmm1_xmm4 "movdqa 0xe0(%[key]), %%xmm1\n" ".Lenclast%=:\n\t" aesenclast_xmm1_xmm0 aesenclast_xmm1_xmm2 aesenclast_xmm1_xmm3 aesenclast_xmm1_xmm4 : : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); asm volatile ("movdqu (%[src]), %%xmm1\n\t" /* Get block 1. */ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */ "movdqu %%xmm0, (%[dst])\n\t" /* Store block 1 */ "movdqu 16(%[src]), %%xmm1\n\t" /* Get block 2. */ "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */ "movdqu %%xmm2, 16(%[dst])\n\t" /* Store block 2. */ "movdqu 32(%[src]), %%xmm1\n\t" /* Get block 3. */ "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */ "movdqu %%xmm3, 32(%[dst])\n\t" /* Store block 3. */ "movdqu 48(%[src]), %%xmm1\n\t" /* Get block 4. */ "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */ "movdqu %%xmm4, 48(%[dst])" /* Store block 4. */ : : [src] "r" (a), [dst] "r" (b) : "memory"); #undef aesenc_xmm1_xmm0 #undef aesenc_xmm1_xmm2 #undef aesenc_xmm1_xmm3 #undef aesenc_xmm1_xmm4 #undef aesenclast_xmm1_xmm0 #undef aesenclast_xmm1_xmm2 #undef aesenclast_xmm1_xmm3 #undef aesenclast_xmm1_xmm4 } #ifdef __x86_64__ /* Eight blocks at a time variant of do_aesni_ctr. */ -static void +static ASM_FUNC_ATTR_INLINE void do_aesni_ctr_8 (const RIJNDAEL_context *ctx, unsigned char *ctr, unsigned char *b, const unsigned char *a) { static const byte bige_addb_const[8][16] __attribute__ ((aligned (16))) = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 } }; const void *bige_addb = bige_addb_const; /* Register usage: [key] keyschedule xmm0 CTR-0 xmm1 temp / round key xmm2 CTR-1 xmm3 CTR-2 xmm4 CTR-3 xmm5 copy of *ctr xmm6 endian swapping mask xmm8 CTR-4 xmm9 CTR-5 xmm10 CTR-6 xmm11 CTR-7 xmm12 temp xmm13 temp xmm14 temp xmm15 temp */ asm volatile (/* detect if 8-bit carry handling is needed */ "cmpb $0xf7, 15(%[ctr])\n\t" "ja .Ladd32bit%=\n\t" "movdqa %%xmm5, %%xmm0\n\t" /* xmm0 := CTR (xmm5) */ "movdqa 0*16(%[addb]), %%xmm2\n\t" /* xmm2 := be(1) */ "movdqa 1*16(%[addb]), %%xmm3\n\t" /* xmm3 := be(2) */ "movdqa 2*16(%[addb]), %%xmm4\n\t" /* xmm4 := be(3) */ "movdqa 3*16(%[addb]), %%xmm8\n\t" /* xmm8 := be(4) */ "movdqa 4*16(%[addb]), %%xmm9\n\t" /* xmm9 := be(5) */ "movdqa 5*16(%[addb]), %%xmm10\n\t" /* xmm10 := be(6) */ "movdqa 6*16(%[addb]), %%xmm11\n\t" /* xmm11 := be(7) */ "movdqa 7*16(%[addb]), %%xmm5\n\t" /* xmm5 := be(8) */ "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "paddb %%xmm0, %%xmm2\n\t" /* xmm2 := be(1) + CTR (xmm0) */ "paddb %%xmm0, %%xmm3\n\t" /* xmm3 := be(2) + CTR (xmm0) */ "paddb %%xmm0, %%xmm4\n\t" /* xmm4 := be(3) + CTR (xmm0) */ "paddb %%xmm0, %%xmm8\n\t" /* xmm8 := be(4) + CTR (xmm0) */ "paddb %%xmm0, %%xmm9\n\t" /* xmm9 := be(5) + CTR (xmm0) */ "paddb %%xmm0, %%xmm10\n\t" /* xmm10 := be(6) + CTR (xmm0) */ "paddb %%xmm0, %%xmm11\n\t" /* xmm11 := be(7) + CTR (xmm0) */ "paddb %%xmm0, %%xmm5\n\t" /* xmm5 := be(8) + CTR (xmm0) */ "jmp .Lstore_ctr%=\n\t" ".Ladd32bit%=:\n\t" "movdqa %%xmm5, %%xmm0\n\t" /* xmm0, xmm2 := CTR (xmm5) */ "movdqa %%xmm0, %%xmm2\n\t" "pcmpeqd %%xmm1, %%xmm1\n\t" "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := le(xmm2) */ "psubq %%xmm1, %%xmm2\n\t" /* xmm2++ */ "movdqa %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ "psubq %%xmm1, %%xmm3\n\t" /* xmm3++ */ "movdqa %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ "psubq %%xmm1, %%xmm4\n\t" /* xmm4++ */ "movdqa %%xmm4, %%xmm8\n\t" /* xmm8 := xmm4 */ "psubq %%xmm1, %%xmm8\n\t" /* xmm8++ */ "movdqa %%xmm8, %%xmm9\n\t" /* xmm9 := xmm8 */ "psubq %%xmm1, %%xmm9\n\t" /* xmm9++ */ "movdqa %%xmm9, %%xmm10\n\t" /* xmm10 := xmm9 */ "psubq %%xmm1, %%xmm10\n\t" /* xmm10++ */ "movdqa %%xmm10, %%xmm11\n\t" /* xmm11 := xmm10 */ "psubq %%xmm1, %%xmm11\n\t" /* xmm11++ */ "movdqa %%xmm11, %%xmm5\n\t" /* xmm5 := xmm11 */ "psubq %%xmm1, %%xmm5\n\t" /* xmm5++ */ /* detect if 64-bit carry handling is needed */ "cmpl $0xffffffff, 8(%[ctr])\n\t" "jne .Lno_carry%=\n\t" "movl 12(%[ctr]), %%esi\n\t" "bswapl %%esi\n\t" "cmpl $0xfffffff8, %%esi\n\t" "jb .Lno_carry%=\n\t" /* no carry */ "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ "je .Lcarry_xmm5%=\n\t" /* esi == 0xfffffff8 */ "cmpl $0xfffffffa, %%esi\n\t" "jb .Lcarry_xmm11%=\n\t" /* esi == 0xfffffff9 */ "je .Lcarry_xmm10%=\n\t" /* esi == 0xfffffffa */ "cmpl $0xfffffffc, %%esi\n\t" "jb .Lcarry_xmm9%=\n\t" /* esi == 0xfffffffb */ "je .Lcarry_xmm8%=\n\t" /* esi == 0xfffffffc */ "cmpl $0xfffffffe, %%esi\n\t" "jb .Lcarry_xmm4%=\n\t" /* esi == 0xfffffffd */ "je .Lcarry_xmm3%=\n\t" /* esi == 0xfffffffe */ /* esi == 0xffffffff */ "psubq %%xmm1, %%xmm2\n\t" ".Lcarry_xmm3%=:\n\t" "psubq %%xmm1, %%xmm3\n\t" ".Lcarry_xmm4%=:\n\t" "psubq %%xmm1, %%xmm4\n\t" ".Lcarry_xmm8%=:\n\t" "psubq %%xmm1, %%xmm8\n\t" ".Lcarry_xmm9%=:\n\t" "psubq %%xmm1, %%xmm9\n\t" ".Lcarry_xmm10%=:\n\t" "psubq %%xmm1, %%xmm10\n\t" ".Lcarry_xmm11%=:\n\t" "psubq %%xmm1, %%xmm11\n\t" ".Lcarry_xmm5%=:\n\t" "psubq %%xmm1, %%xmm5\n\t" ".Lno_carry%=:\n\t" "movdqa (%[key]), %%xmm1\n\t" /* xmm1 := key[0] */ "pshufb %%xmm6, %%xmm2\n\t" /* xmm2 := be(xmm2) */ "pshufb %%xmm6, %%xmm3\n\t" /* xmm3 := be(xmm3) */ "pshufb %%xmm6, %%xmm4\n\t" /* xmm4 := be(xmm4) */ "pshufb %%xmm6, %%xmm5\n\t" /* xmm5 := be(xmm5) */ "pshufb %%xmm6, %%xmm8\n\t" /* xmm8 := be(xmm8) */ "pshufb %%xmm6, %%xmm9\n\t" /* xmm9 := be(xmm9) */ "pshufb %%xmm6, %%xmm10\n\t" /* xmm10 := be(xmm10) */ "pshufb %%xmm6, %%xmm11\n\t" /* xmm11 := be(xmm11) */ ".Lstore_ctr%=:\n\t" "movdqa %%xmm5, (%[ctr])\n\t" /* Update CTR (mem). */ : : [ctr] "r" (ctr), [key] "r" (ctx->keyschenc), [addb] "r" (bige_addb) : "%esi", "cc", "memory"); asm volatile ("pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ "pxor %%xmm1, %%xmm8\n\t" /* xmm8 ^= key[0] */ "pxor %%xmm1, %%xmm9\n\t" /* xmm9 ^= key[0] */ "pxor %%xmm1, %%xmm10\n\t" /* xmm10 ^= key[0] */ "pxor %%xmm1, %%xmm11\n\t" /* xmm11 ^= key[0] */ "movdqa 0x10(%[key]), %%xmm1\n\t" "cmpl $12, %[rounds]\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x20(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x30(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x40(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x50(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x60(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x70(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x80(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0x90(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xa0(%[key]), %%xmm1\n\t" "jb .Lenclast%=\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xb0(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xc0(%[key]), %%xmm1\n\t" "je .Lenclast%=\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xd0(%[key]), %%xmm1\n\t" "aesenc %%xmm1, %%xmm0\n\t" "aesenc %%xmm1, %%xmm2\n\t" "aesenc %%xmm1, %%xmm3\n\t" "aesenc %%xmm1, %%xmm4\n\t" "aesenc %%xmm1, %%xmm8\n\t" "aesenc %%xmm1, %%xmm9\n\t" "aesenc %%xmm1, %%xmm10\n\t" "aesenc %%xmm1, %%xmm11\n\t" "movdqa 0xe0(%[key]), %%xmm1\n" ".Lenclast%=:\n\t" : : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); asm volatile ("movdqu 0*16(%[src]), %%xmm12\n\t" /* Get block 1. */ "movdqu 1*16(%[src]), %%xmm13\n\t" /* Get block 2. */ "movdqu 2*16(%[src]), %%xmm14\n\t" /* Get block 3. */ "movdqu 3*16(%[src]), %%xmm15\n\t" /* Get block 4. */ "movdqu 4*16(%[src]), %%xmm7\n\t" /* Get block 5. */ "pxor %%xmm1, %%xmm12\n\t" /* block1 ^= lastkey */ "aesenclast %%xmm12, %%xmm0\n\t" "movdqu 5*16(%[src]), %%xmm12\n\t" /* Get block 6. */ "pxor %%xmm1, %%xmm13\n\t" /* block2 ^= lastkey */ "aesenclast %%xmm13, %%xmm2\n\t" "movdqu 6*16(%[src]), %%xmm13\n\t" /* Get block 7. */ "pxor %%xmm1, %%xmm14\n\t" /* block3 ^= lastkey */ "aesenclast %%xmm14, %%xmm3\n\t" "movdqu 7*16(%[src]), %%xmm14\n\t" /* Get block 8. */ "pxor %%xmm1, %%xmm15\n\t" /* block4 ^= lastkey */ "aesenclast %%xmm15, %%xmm4\n\t" "movdqu %%xmm0, 0*16(%[dst])\n\t" /* Store block 1 */ "pxor %%xmm1, %%xmm7\n\t" /* block5 ^= lastkey */ "aesenclast %%xmm7, %%xmm8\n\t" "movdqu %%xmm0, 0*16(%[dst])\n\t" /* Store block 1 */ "pxor %%xmm1, %%xmm12\n\t" /* block6 ^= lastkey */ "aesenclast %%xmm12, %%xmm9\n\t" "movdqu %%xmm2, 1*16(%[dst])\n\t" /* Store block 2. */ "pxor %%xmm1, %%xmm13\n\t" /* block7 ^= lastkey */ "aesenclast %%xmm13, %%xmm10\n\t" "movdqu %%xmm3, 2*16(%[dst])\n\t" /* Store block 3. */ "pxor %%xmm1, %%xmm14\n\t" /* block8 ^= lastkey */ "aesenclast %%xmm14, %%xmm11\n\t" "movdqu %%xmm4, 3*16(%[dst])\n\t" /* Store block 4. */ "movdqu %%xmm8, 4*16(%[dst])\n\t" /* Store block 8. */ "movdqu %%xmm9, 5*16(%[dst])\n\t" /* Store block 9. */ "movdqu %%xmm10, 6*16(%[dst])\n\t" /* Store block 10. */ "movdqu %%xmm11, 7*16(%[dst])\n\t" /* Store block 11. */ : : [src] "r" (a), [dst] "r" (b) : "memory"); } #endif /* __x86_64__ */ -unsigned int +unsigned int ASM_FUNC_ATTR _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src) { aesni_prepare (); asm volatile ("movdqu %[src], %%xmm0\n\t" : : [src] "m" (*src) : "memory" ); do_aesni_enc (ctx); asm volatile ("movdqu %%xmm0, %[dst]\n\t" : [dst] "=m" (*dst) : : "memory" ); aesni_cleanup (); return 0; } -void +void ASM_FUNC_ATTR _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { aesni_prepare (); asm volatile ("movdqu %[iv], %%xmm0\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); for ( ;nblocks; nblocks-- ) { do_aesni_enc (ctx); asm volatile ("movdqu %[inbuf], %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : [inbuf] "m" (*inbuf) : "memory" ); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm0, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); aesni_cleanup (); } -void +void ASM_FUNC_ATTR _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks, int cbc_mac) { aesni_prepare_2_7_variable; aesni_prepare (); aesni_prepare_2_7(); asm volatile ("movdqu %[iv], %%xmm5\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); for ( ;nblocks; nblocks-- ) { asm volatile ("movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm5, %%xmm0\n\t" : /* No output */ : [inbuf] "m" (*inbuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("movdqa %%xmm0, %%xmm5\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; if (!cbc_mac) outbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm5, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_7 (); } -void +void ASM_FUNC_ATTR _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; aesni_prepare_2_7_variable; aesni_prepare (); aesni_prepare_2_7(); asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */ "movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */ : /* No output */ : [mask] "m" (*be_mask), [ctr] "m" (*ctr) : "memory"); #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_8_15_variable; aesni_prepare_8_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { do_aesni_ctr_8 (ctx, ctr, outbuf, inbuf); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } aesni_cleanup_8_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { do_aesni_ctr (ctx, ctr, outbuf, inbuf); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } aesni_cleanup (); aesni_cleanup_2_7 (); } -unsigned int +unsigned int ASM_FUNC_ATTR _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src) { aesni_prepare (); asm volatile ("movdqu %[src], %%xmm0\n\t" : : [src] "m" (*src) : "memory" ); do_aesni_dec (ctx); asm volatile ("movdqu %%xmm0, %[dst]\n\t" : [dst] "=m" (*dst) : : "memory" ); aesni_cleanup (); return 0; } -void +void ASM_FUNC_ATTR _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { aesni_prepare_2_7_variable; aesni_prepare (); aesni_prepare_2_7(); asm volatile ("movdqu %[iv], %%xmm6\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); /* CFB decryption can be parallelized */ #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_8_15_variable; aesni_prepare_8_15(); for ( ;nblocks >= 8; nblocks -= 8) { asm volatile ("movdqu %%xmm6, %%xmm1\n\t" /* load input blocks */ "movdqu 0*16(%[inbuf]), %%xmm2\n\t" "movdqu 1*16(%[inbuf]), %%xmm3\n\t" "movdqu 2*16(%[inbuf]), %%xmm4\n\t" "movdqu 3*16(%[inbuf]), %%xmm8\n\t" "movdqu 4*16(%[inbuf]), %%xmm9\n\t" "movdqu 5*16(%[inbuf]), %%xmm10\n\t" "movdqu 6*16(%[inbuf]), %%xmm11\n\t" "movdqu 7*16(%[inbuf]), %%xmm6\n\t" /* update IV */ "movdqa %%xmm2, %%xmm12\n\t" "movdqa %%xmm3, %%xmm13\n\t" "movdqa %%xmm4, %%xmm14\n\t" "movdqa %%xmm8, %%xmm15\n\t" : /* No output */ : [inbuf] "r" (inbuf) : "memory"); do_aesni_enc_vec8 (ctx); asm volatile ( "pxor %%xmm12, %%xmm1\n\t" "movdqu 4*16(%[inbuf]), %%xmm12\n\t" "pxor %%xmm13, %%xmm2\n\t" "movdqu 5*16(%[inbuf]), %%xmm13\n\t" "pxor %%xmm14, %%xmm3\n\t" "movdqu 6*16(%[inbuf]), %%xmm14\n\t" "pxor %%xmm15, %%xmm4\n\t" "movdqu 7*16(%[inbuf]), %%xmm15\n\t" "pxor %%xmm12, %%xmm8\n\t" "movdqu %%xmm1, 0*16(%[outbuf])\n\t" "pxor %%xmm13, %%xmm9\n\t" "movdqu %%xmm2, 1*16(%[outbuf])\n\t" "pxor %%xmm14, %%xmm10\n\t" "movdqu %%xmm3, 2*16(%[outbuf])\n\t" "pxor %%xmm15, %%xmm11\n\t" "movdqu %%xmm4, 3*16(%[outbuf])\n\t" "movdqu %%xmm8, 4*16(%[outbuf])\n\t" "movdqu %%xmm9, 5*16(%[outbuf])\n\t" "movdqu %%xmm10, 6*16(%[outbuf])\n\t" "movdqu %%xmm11, 7*16(%[outbuf])\n\t" : /* No output */ : [inbuf] "r" (inbuf), [outbuf] "r" (outbuf) : "memory"); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } aesni_cleanup_8_15(); } #endif for ( ;nblocks >= 4; nblocks -= 4) { asm volatile ("movdqu %%xmm6, %%xmm1\n\t" /* load input blocks */ "movdqu 0*16(%[inbuf]), %%xmm2\n\t" "movdqu 1*16(%[inbuf]), %%xmm3\n\t" "movdqu 2*16(%[inbuf]), %%xmm4\n\t" "movdqu 3*16(%[inbuf]), %%xmm6\n\t" /* update IV */ : /* No output */ : [inbuf] "r" (inbuf) : "memory"); do_aesni_enc_vec4 (ctx); asm volatile ("movdqu 0*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu %%xmm1, 0*16(%[outbuf])\n\t" "movdqu 1*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm5, %%xmm2\n\t" "movdqu %%xmm2, 1*16(%[outbuf])\n\t" "movdqu 2*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqu %%xmm3, 2*16(%[outbuf])\n\t" "movdqu 3*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, 3*16(%[outbuf])\n\t" : /* No output */ : [inbuf] "r" (inbuf), [outbuf] "r" (outbuf) : "memory"); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } asm volatile ("movdqu %%xmm6, %%xmm0\n\t" ::: "cc"); for ( ;nblocks; nblocks-- ) { do_aesni_enc (ctx); asm volatile ("movdqa %%xmm0, %%xmm6\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm6, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : [inbuf] "m" (*inbuf) : "memory" ); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm0, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_7 (); } -void +void ASM_FUNC_ATTR _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { aesni_prepare_2_7_variable; aesni_prepare (); aesni_prepare_2_7(); if ( !ctx->decryption_prepared ) { do_aesni_prepare_decryption ( ctx ); ctx->decryption_prepared = 1; } asm volatile ("movdqu %[iv], %%xmm5\n\t" /* use xmm5 as fast IV storage */ : /* No output */ : [iv] "m" (*iv) : "memory"); #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_8_15_variable; aesni_prepare_8_15(); for ( ;nblocks >= 8 ; nblocks -= 8 ) { asm volatile ("movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */ "movdqu 1*16(%[inbuf]), %%xmm2\n\t" "movdqu 2*16(%[inbuf]), %%xmm3\n\t" "movdqu 3*16(%[inbuf]), %%xmm4\n\t" "movdqu 4*16(%[inbuf]), %%xmm8\n\t" "movdqu 5*16(%[inbuf]), %%xmm9\n\t" "movdqu 6*16(%[inbuf]), %%xmm10\n\t" "movdqu 7*16(%[inbuf]), %%xmm11\n\t" "movdqa %%xmm1, %%xmm12\n\t" "movdqa %%xmm2, %%xmm13\n\t" "movdqa %%xmm3, %%xmm14\n\t" "movdqa %%xmm4, %%xmm15\n\t" : /* No output */ : [inbuf] "r" (inbuf) : "memory"); do_aesni_dec_vec8 (ctx); asm volatile ("pxor %%xmm5, %%xmm1\n\t" /* xor IV with output */ "pxor %%xmm12, %%xmm2\n\t" /* xor IV with output */ "movdqu 4*16(%[inbuf]), %%xmm12\n\t" "pxor %%xmm13, %%xmm3\n\t" /* xor IV with output */ "movdqu 5*16(%[inbuf]), %%xmm13\n\t" "pxor %%xmm14, %%xmm4\n\t" /* xor IV with output */ "movdqu 6*16(%[inbuf]), %%xmm14\n\t" "pxor %%xmm15, %%xmm8\n\t" /* xor IV with output */ "movdqu 7*16(%[inbuf]), %%xmm5\n\t" "pxor %%xmm12, %%xmm9\n\t" /* xor IV with output */ "movdqu %%xmm1, 0*16(%[outbuf])\n\t" "pxor %%xmm13, %%xmm10\n\t" /* xor IV with output */ "movdqu %%xmm2, 1*16(%[outbuf])\n\t" "pxor %%xmm14, %%xmm11\n\t" /* xor IV with output */ "movdqu %%xmm3, 2*16(%[outbuf])\n\t" "movdqu %%xmm4, 3*16(%[outbuf])\n\t" "movdqu %%xmm8, 4*16(%[outbuf])\n\t" "movdqu %%xmm9, 5*16(%[outbuf])\n\t" "movdqu %%xmm10, 6*16(%[outbuf])\n\t" "movdqu %%xmm11, 7*16(%[outbuf])\n\t" : /* No output */ : [inbuf] "r" (inbuf), [outbuf] "r" (outbuf) : "memory"); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } aesni_cleanup_8_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { asm volatile ("movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */ "movdqu 1*16(%[inbuf]), %%xmm2\n\t" "movdqu 2*16(%[inbuf]), %%xmm3\n\t" "movdqu 3*16(%[inbuf]), %%xmm4\n\t" : /* No output */ : [inbuf] "r" (inbuf) : "memory"); do_aesni_dec_vec4 (ctx); asm volatile ("pxor %%xmm5, %%xmm1\n\t" /* xor IV with output */ "movdqu 0*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ "movdqu %%xmm1, 0*16(%[outbuf])\n\t" "pxor %%xmm5, %%xmm2\n\t" /* xor IV with output */ "movdqu 1*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ "movdqu %%xmm2, 1*16(%[outbuf])\n\t" "pxor %%xmm5, %%xmm3\n\t" /* xor IV with output */ "movdqu 2*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ "movdqu %%xmm3, 2*16(%[outbuf])\n\t" "pxor %%xmm5, %%xmm4\n\t" /* xor IV with output */ "movdqu 3*16(%[inbuf]), %%xmm5\n\t" /* load new IV */ "movdqu %%xmm4, 3*16(%[outbuf])\n\t" : /* No output */ : [inbuf] "r" (inbuf), [outbuf] "r" (outbuf) : "memory"); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { asm volatile ("movdqu %[inbuf], %%xmm0\n\t" "movdqa %%xmm0, %%xmm2\n\t" /* use xmm2 as savebuf */ : /* No output */ : [inbuf] "m" (*inbuf) : "memory"); /* uses only xmm0 and xmm1 */ do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" /* xor IV with output */ "movdqu %%xmm0, %[outbuf]\n\t" "movdqu %%xmm2, %%xmm5\n\t" /* store savebuf as new IV */ : [outbuf] "=m" (*outbuf) : : "memory"); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm5, %[iv]\n\t" /* store IV */ : /* No output */ : [iv] "m" (*iv) : "memory"); aesni_cleanup (); aesni_cleanup_2_7 (); } -static ALWAYS_INLINE void +static ASM_FUNC_ATTR_INLINE void aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; /* Calculate checksum */ asm volatile ("movdqu %[checksum], %%xmm6\n\t" "pxor %%xmm1, %%xmm1\n\t" "pxor %%xmm2, %%xmm2\n\t" "pxor %%xmm3, %%xmm3\n\t" : :[checksum] "m" (*c->u_ctr.ctr) : "memory" ); if (0) {} #if defined(HAVE_GCC_INLINE_ASM_AVX2) else if (nblocks >= 16 && ctx->use_avx2) { /* Use wider 256-bit registers for fast xoring of plaintext. */ asm volatile ("vzeroupper\n\t" "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" "vpxor %%xmm5, %%xmm5, %%xmm5\n\t" "vpxor %%xmm7, %%xmm7, %%xmm7\n\t" : : : "memory"); for (;nblocks >= 16; nblocks -= 16) { asm volatile ("vpxor %[ptr0], %%ymm6, %%ymm6\n\t" "vpxor %[ptr1], %%ymm1, %%ymm1\n\t" "vpxor %[ptr2], %%ymm2, %%ymm2\n\t" "vpxor %[ptr3], %%ymm3, %%ymm3\n\t" "vpxor %[ptr4], %%ymm0, %%ymm0\n\t" "vpxor %[ptr5], %%ymm4, %%ymm4\n\t" "vpxor %[ptr6], %%ymm5, %%ymm5\n\t" "vpxor %[ptr7], %%ymm7, %%ymm7\n\t" : : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)), [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)), [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)), [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)), [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)), [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)), [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2)) : "memory" ); plaintext += BLOCKSIZE * 16; } asm volatile ("vpxor %%ymm0, %%ymm6, %%ymm6\n\t" "vpxor %%ymm4, %%ymm1, %%ymm1\n\t" "vpxor %%ymm5, %%ymm2, %%ymm2\n\t" "vpxor %%ymm7, %%ymm3, %%ymm3\n\t" "vextracti128 $1, %%ymm6, %%xmm0\n\t" "vextracti128 $1, %%ymm1, %%xmm4\n\t" "vextracti128 $1, %%ymm2, %%xmm5\n\t" "vextracti128 $1, %%ymm3, %%xmm7\n\t" "vpxor %%xmm0, %%xmm6, %%xmm6\n\t" "vpxor %%xmm4, %%xmm1, %%xmm1\n\t" "vpxor %%xmm5, %%xmm2, %%xmm2\n\t" "vpxor %%xmm7, %%xmm3, %%xmm3\n\t" "vzeroupper\n\t" : : : "memory" ); } #endif #if defined(HAVE_GCC_INLINE_ASM_AVX) else if (nblocks >= 16 && ctx->use_avx) { /* Same as AVX2, except using 256-bit floating point instructions. */ asm volatile ("vzeroupper\n\t" "vxorpd %%xmm0, %%xmm0, %%xmm0\n\t" "vxorpd %%xmm4, %%xmm4, %%xmm4\n\t" "vxorpd %%xmm5, %%xmm5, %%xmm5\n\t" "vxorpd %%xmm7, %%xmm7, %%xmm7\n\t" : : : "memory"); for (;nblocks >= 16; nblocks -= 16) { asm volatile ("vxorpd %[ptr0], %%ymm6, %%ymm6\n\t" "vxorpd %[ptr1], %%ymm1, %%ymm1\n\t" "vxorpd %[ptr2], %%ymm2, %%ymm2\n\t" "vxorpd %[ptr3], %%ymm3, %%ymm3\n\t" "vxorpd %[ptr4], %%ymm0, %%ymm0\n\t" "vxorpd %[ptr5], %%ymm4, %%ymm4\n\t" "vxorpd %[ptr6], %%ymm5, %%ymm5\n\t" "vxorpd %[ptr7], %%ymm7, %%ymm7\n\t" : : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)), [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)), [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)), [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)), [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)), [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)), [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2)) : "memory" ); plaintext += BLOCKSIZE * 16; } asm volatile ("vxorpd %%ymm0, %%ymm6, %%ymm6\n\t" "vxorpd %%ymm4, %%ymm1, %%ymm1\n\t" "vxorpd %%ymm5, %%ymm2, %%ymm2\n\t" "vxorpd %%ymm7, %%ymm3, %%ymm3\n\t" "vextractf128 $1, %%ymm6, %%xmm0\n\t" "vextractf128 $1, %%ymm1, %%xmm4\n\t" "vextractf128 $1, %%ymm2, %%xmm5\n\t" "vextractf128 $1, %%ymm3, %%xmm7\n\t" "vxorpd %%xmm0, %%xmm6, %%xmm6\n\t" "vxorpd %%xmm4, %%xmm1, %%xmm1\n\t" "vxorpd %%xmm5, %%xmm2, %%xmm2\n\t" "vxorpd %%xmm7, %%xmm3, %%xmm3\n\t" "vzeroupper\n\t" : : : "memory" ); } #endif for (;nblocks >= 4; nblocks -= 4) { asm volatile ("movdqu %[ptr0], %%xmm0\n\t" "movdqu %[ptr1], %%xmm4\n\t" "movdqu %[ptr2], %%xmm5\n\t" "movdqu %[ptr3], %%xmm7\n\t" "pxor %%xmm0, %%xmm6\n\t" "pxor %%xmm4, %%xmm1\n\t" "pxor %%xmm5, %%xmm2\n\t" "pxor %%xmm7, %%xmm3\n\t" : : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)), [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE)), [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE)), [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE)) : "memory" ); plaintext += BLOCKSIZE * 4; } for (;nblocks >= 1; nblocks -= 1) { asm volatile ("movdqu %[ptr0], %%xmm0\n\t" "pxor %%xmm0, %%xmm6\n\t" : : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)) : "memory" ); plaintext += BLOCKSIZE; } asm volatile ("pxor %%xmm1, %%xmm6\n\t" "pxor %%xmm2, %%xmm6\n\t" "pxor %%xmm3, %%xmm6\n\t" "movdqu %%xmm6, %[checksum]\n\t" : [checksum] "=m" (*c->u_ctr.ctr) : : "memory" ); } -static unsigned int NO_INLINE +static unsigned int ASM_FUNC_ATTR_NOINLINE aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; const unsigned char *l; byte tmpbuf_store[3 * 16 + 15]; byte *tmpbuf; aesni_prepare_2_7_variable; asm volatile ("" : "=r" (tmpbuf) : "0" (tmpbuf_store) : "memory"); tmpbuf = tmpbuf + (-(uintptr_t)tmpbuf & 15); aesni_prepare (); aesni_prepare_2_7 (); /* Preload Offset */ asm volatile ("movdqu %[iv], %%xmm5\n\t" "movdqu %[ctr], %%xmm7\n\t" : /* No output */ : [iv] "m" (*c->u_iv.iv), [ctr] "m" (*c->u_ctr.ctr) : "memory" ); for ( ;nblocks && n % 4; nblocks-- ) { l = aes_ocb_get_l(c, ++n); /* Checksum_i = Checksum_{i-1} xor P_i */ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm0, %%xmm7\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } #ifdef __x86_64__ if (nblocks >= 8) { unsigned char last_xor_first_key_store[16 + 15]; unsigned char *lxf_key; aesni_prepare_8_15_variable; asm volatile ("" : "=r" (lxf_key) : "0" (last_xor_first_key_store) : "memory"); lxf_key = lxf_key + (-(uintptr_t)lxf_key & 15); aesni_prepare_8_15(); asm volatile ("movdqu %[l0], %%xmm6\n\t" "movdqa %[last_key], %%xmm0\n\t" "pxor %[first_key], %%xmm5\n\t" "pxor %[first_key], %%xmm0\n\t" "movdqa %%xmm0, %[lxfkey]\n\t" : [lxfkey] "=m" (*lxf_key) : [l0] "m" (*c->u_mode.ocb.L[0]), [last_key] "m" (ctx->keyschenc[ctx->rounds][0][0]), [first_key] "m" (ctx->keyschenc[0][0][0]) : "memory" ); for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[l0l1], %%xmm10\n\t" "movdqu %[l1], %%xmm11\n\t" "movdqu %[l3], %%xmm15\n\t" : : [l0l1] "m" (*c->u_mode.ocb.L0L1), [l1] "m" (*c->u_mode.ocb.L[1]), [l3] "m" (*l) : "memory" ); n += 4; l = aes_ocb_get_l(c, n); /* Checksum_i = Checksum_{i-1} xor P_i */ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i) */ asm volatile ("movdqu %[inbuf0], %%xmm1\n\t" "movdqu %[inbuf1], %%xmm2\n\t" "movdqu %[inbuf2], %%xmm3\n\t" : : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)), [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)), [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf3], %%xmm4\n\t" "movdqu %[inbuf4], %%xmm8\n\t" "movdqu %[inbuf5], %%xmm9\n\t" : : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)), [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)), [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqa %[lxfkey], %%xmm0\n\t" "movdqa %%xmm6, %%xmm12\n\t" "pxor %%xmm5, %%xmm12\n\t" "pxor %%xmm1, %%xmm7\n\t" "pxor %%xmm12, %%xmm1\n\t" "pxor %%xmm0, %%xmm12\n\t" "movdqa %%xmm10, %%xmm13\n\t" "pxor %%xmm5, %%xmm13\n\t" "pxor %%xmm2, %%xmm7\n\t" "pxor %%xmm13, %%xmm2\n\t" "pxor %%xmm0, %%xmm13\n\t" "movdqa %%xmm11, %%xmm14\n\t" "pxor %%xmm5, %%xmm14\n\t" "pxor %%xmm3, %%xmm7\n\t" "pxor %%xmm14, %%xmm3\n\t" "pxor %%xmm0, %%xmm14\n\t" "pxor %%xmm11, %%xmm5\n\t" "pxor %%xmm15, %%xmm5\n\t" "pxor %%xmm4, %%xmm7\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqa %%xmm5, %%xmm15\n\t" "pxor %%xmm0, %%xmm15\n\t" "movdqa %%xmm5, %%xmm0\n\t" "pxor %%xmm6, %%xmm0\n\t" "pxor %%xmm8, %%xmm7\n\t" "pxor %%xmm0, %%xmm8\n\t" "pxor %[lxfkey], %%xmm0\n\t" "movdqa %%xmm0, %[tmpbuf0]\n\t" "movdqa %%xmm10, %%xmm0\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm9, %%xmm7\n\t" "pxor %%xmm0, %%xmm9\n\t" "pxor %[lxfkey], %%xmm0\n" "movdqa %%xmm0, %[tmpbuf1]\n\t" : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)), [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)) : [lxfkey] "m" (*lxf_key) : "memory" ); asm volatile ("movdqu %[inbuf6], %%xmm10\n\t" "movdqa %%xmm11, %%xmm0\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm10, %%xmm7\n\t" "pxor %%xmm0, %%xmm10\n\t" "pxor %[lxfkey], %%xmm0\n\t" "movdqa %%xmm0, %[tmpbuf2]\n\t" : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE)) : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)), [lxfkey] "m" (*lxf_key) : "memory" ); asm volatile ("movdqu %[l7], %%xmm0\n\t" "pxor %%xmm11, %%xmm5\n\t" "pxor %%xmm0, %%xmm5\n\t" "movdqa 0x10(%[key]), %%xmm0\n\t" "movdqu %[inbuf7], %%xmm11\n\t" "pxor %%xmm11, %%xmm7\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), [inbuf7] "m" (*(inbuf + 7 * BLOCKSIZE)), [key] "r" (ctx->keyschenc) : "memory" ); asm volatile ("cmpl $12, %[rounds]\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x20(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x30(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x40(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x50(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x60(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x70(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x80(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0x90(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "jb .Ldeclast%=\n\t" "movdqa 0xa0(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xb0(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "je .Ldeclast%=\n\t" "movdqa 0xc0(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" "movdqa 0xd0(%[key]), %%xmm0\n\t" "aesenc %%xmm0, %%xmm1\n\t" "aesenc %%xmm0, %%xmm2\n\t" "aesenc %%xmm0, %%xmm3\n\t" "aesenc %%xmm0, %%xmm4\n\t" "aesenc %%xmm0, %%xmm8\n\t" "aesenc %%xmm0, %%xmm9\n\t" "aesenc %%xmm0, %%xmm10\n\t" "aesenc %%xmm0, %%xmm11\n\t" ".Ldeclast%=:\n\t" : : [key] "r" (ctx->keyschenc), [rounds] "r" (ctx->rounds) : "cc", "memory"); asm volatile ("aesenclast %%xmm12, %%xmm1\n\t" "aesenclast %%xmm13, %%xmm2\n\t" "aesenclast %%xmm14, %%xmm3\n\t" "aesenclast %%xmm15, %%xmm4\n\t" "aesenclast %[tmpbuf0],%%xmm8\n\t" "aesenclast %[tmpbuf1],%%xmm9\n\t" "aesenclast %[tmpbuf2],%%xmm10\n\t" "aesenclast %%xmm5, %%xmm11\n\t" "pxor %[lxfkey], %%xmm11\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" "movdqu %%xmm8, %[outbuf4]\n\t" "movdqu %%xmm9, %[outbuf5]\n\t" "movdqu %%xmm10, %[outbuf6]\n\t" "movdqu %%xmm11, %[outbuf7]\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)), [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)), [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)), [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)), [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE)) : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)), [lxfkey] "m" (*lxf_key) : "memory" ); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } asm volatile ("pxor %[first_key], %%xmm5\n\t" "pxor %%xmm0, %%xmm0\n\t" "movdqu %%xmm0, %[lxfkey]\n\t" : [lxfkey] "=m" (*lxf_key) : [first_key] "m" (ctx->keyschenc[0][0][0]) : "memory" ); aesni_cleanup_8_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; l = aes_ocb_get_l(c, n); /* Checksum_i = Checksum_{i-1} xor P_i */ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l0], %%xmm0\n\t" "movdqu %[inbuf0], %%xmm1\n\t" "movdqu %[l0l1], %%xmm3\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]), [l0l1] "m" (*c->u_mode.ocb.L0L1), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l1], %%xmm4\n\t" "movdqu %[l3], %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm1, %%xmm7\n\t" "pxor %%xmm0, %%xmm1\n\t" "movdqa %%xmm0, %[tmpbuf0]\n\t" : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)) : [l1] "m" (*c->u_mode.ocb.L[1]), [l3] "m" (*l) : "memory" ); asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm2, %%xmm7\n\t" "pxor %%xmm3, %%xmm2\n\t" "movdqa %%xmm3, %[tmpbuf1]\n\t" : [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)) : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqa %%xmm4, %%xmm0\n\t" "movdqu %[inbuf2], %%xmm3\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm3, %%xmm7\n\t" "pxor %%xmm0, %%xmm3\n\t" "movdqa %%xmm0, %[tmpbuf2]\n\t" : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE)) : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("pxor %%xmm6, %%xmm5\n\t" "pxor %%xmm4, %%xmm5\n\t" "movdqu %[inbuf3], %%xmm4\n\t" "pxor %%xmm4, %%xmm7\n\t" "pxor %%xmm5, %%xmm4\n\t" : : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec4 (ctx); asm volatile ("pxor %[tmpbuf0],%%xmm1\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "pxor %[tmpbuf1],%%xmm2\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "pxor %[tmpbuf2],%%xmm3\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)) : "memory" ); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { l = aes_ocb_get_l(c, ++n); /* Checksum_i = Checksum_{i-1} xor P_i */ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm0, %%xmm7\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" "movdqu %%xmm7, %[ctr]\n\t" : [iv] "=m" (*c->u_iv.iv), [ctr] "=m" (*c->u_ctr.ctr) : : "memory" ); asm volatile ("pxor %%xmm0, %%xmm0\n\t" "movdqa %%xmm0, %[tmpbuf0]\n\t" "movdqa %%xmm0, %[tmpbuf1]\n\t" "movdqa %%xmm0, %[tmpbuf2]\n\t" : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)), [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)), [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE)) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_7 (); return 0; } -static unsigned int NO_INLINE +static unsigned int ASM_FUNC_ATTR_NOINLINE aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks_arg) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; const unsigned char *l; size_t nblocks = nblocks_arg; byte tmpbuf_store[3 * 16 + 15]; byte *tmpbuf; aesni_prepare_2_7_variable; asm volatile ("" : "=r" (tmpbuf) : "0" (tmpbuf_store) : "memory"); tmpbuf = tmpbuf + (-(uintptr_t)tmpbuf & 15); aesni_prepare (); aesni_prepare_2_7 (); if ( !ctx->decryption_prepared ) { do_aesni_prepare_decryption ( ctx ); ctx->decryption_prepared = 1; } /* Preload Offset */ asm volatile ("movdqu %[iv], %%xmm5\n\t" : /* No output */ : [iv] "m" (*c->u_iv.iv) : "memory" ); for ( ;nblocks && n % 4; nblocks-- ) { l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } #ifdef __x86_64__ if (nblocks >= 8) { unsigned char last_xor_first_key_store[16 + 15]; unsigned char *lxf_key; aesni_prepare_8_15_variable; asm volatile ("" : "=r" (lxf_key) : "0" (last_xor_first_key_store) : "memory"); lxf_key = lxf_key + (-(uintptr_t)lxf_key & 15); aesni_prepare_8_15(); asm volatile ("movdqu %[l0], %%xmm6\n\t" "movdqa %[last_key], %%xmm0\n\t" "pxor %[first_key], %%xmm5\n\t" "pxor %[first_key], %%xmm0\n\t" "movdqa %%xmm0, %[lxfkey]\n\t" : [lxfkey] "=m" (*lxf_key) : [l0] "m" (*c->u_mode.ocb.L[0]), [last_key] "m" (ctx->keyschdec[ctx->rounds][0][0]), [first_key] "m" (ctx->keyschdec[0][0][0]) : "memory" ); for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[l0l1], %%xmm10\n\t" "movdqu %[l1], %%xmm11\n\t" "movdqu %[l3], %%xmm15\n\t" : : [l0l1] "m" (*c->u_mode.ocb.L0L1), [l1] "m" (*c->u_mode.ocb.L[1]), [l3] "m" (*l) : "memory" ); n += 4; l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i) */ asm volatile ("movdqu %[inbuf0], %%xmm1\n\t" "movdqu %[inbuf1], %%xmm2\n\t" "movdqu %[inbuf2], %%xmm3\n\t" : : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)), [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)), [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[inbuf3], %%xmm4\n\t" "movdqu %[inbuf4], %%xmm8\n\t" "movdqu %[inbuf5], %%xmm9\n\t" : : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)), [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)), [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqa %[lxfkey], %%xmm0\n\t" "movdqa %%xmm6, %%xmm12\n\t" "pxor %%xmm5, %%xmm12\n\t" "pxor %%xmm12, %%xmm1\n\t" "pxor %%xmm0, %%xmm12\n\t" "movdqa %%xmm10, %%xmm13\n\t" "pxor %%xmm5, %%xmm13\n\t" "pxor %%xmm13, %%xmm2\n\t" "pxor %%xmm0, %%xmm13\n\t" "movdqa %%xmm11, %%xmm14\n\t" "pxor %%xmm5, %%xmm14\n\t" "pxor %%xmm14, %%xmm3\n\t" "pxor %%xmm0, %%xmm14\n\t" "pxor %%xmm11, %%xmm5\n\t" "pxor %%xmm15, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqa %%xmm5, %%xmm15\n\t" "pxor %%xmm0, %%xmm15\n\t" "movdqa %%xmm5, %%xmm0\n\t" "pxor %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm8\n\t" "pxor %[lxfkey], %%xmm0\n\t" "movdqa %%xmm0, %[tmpbuf0]\n\t" "movdqa %%xmm10, %%xmm0\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm0, %%xmm9\n\t" "pxor %[lxfkey], %%xmm0\n" "movdqa %%xmm0, %[tmpbuf1]\n\t" : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)), [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)) : [lxfkey] "m" (*lxf_key) : "memory" ); asm volatile ("movdqu %[inbuf6], %%xmm10\n\t" "movdqa %%xmm11, %%xmm0\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm0, %%xmm10\n\t" "pxor %[lxfkey], %%xmm0\n\t" "movdqa %%xmm0, %[tmpbuf2]\n\t" : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE)) : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)), [lxfkey] "m" (*lxf_key) : "memory" ); asm volatile ("movdqu %[l7], %%xmm0\n\t" "pxor %%xmm11, %%xmm5\n\t" "pxor %%xmm0, %%xmm5\n\t" "movdqa 0x10(%[key]), %%xmm0\n\t" "movdqu %[inbuf7], %%xmm11\n\t" "pxor %%xmm5, %%xmm11\n\t" : : [l7] "m" (*l), [inbuf7] "m" (*(inbuf + 7 * BLOCKSIZE)), [key] "r" (ctx->keyschdec) : "memory" ); asm volatile ("cmpl $12, %[rounds]\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x20(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x30(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x40(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x50(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x60(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x70(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x80(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0x90(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "jb .Ldeclast%=\n\t" "movdqa 0xa0(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xb0(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "je .Ldeclast%=\n\t" "movdqa 0xc0(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" "movdqa 0xd0(%[key]), %%xmm0\n\t" "aesdec %%xmm0, %%xmm1\n\t" "aesdec %%xmm0, %%xmm2\n\t" "aesdec %%xmm0, %%xmm3\n\t" "aesdec %%xmm0, %%xmm4\n\t" "aesdec %%xmm0, %%xmm8\n\t" "aesdec %%xmm0, %%xmm9\n\t" "aesdec %%xmm0, %%xmm10\n\t" "aesdec %%xmm0, %%xmm11\n\t" ".Ldeclast%=:\n\t" : : [key] "r" (ctx->keyschdec), [rounds] "r" (ctx->rounds) : "cc", "memory"); asm volatile ("aesdeclast %%xmm12, %%xmm1\n\t" "aesdeclast %%xmm13, %%xmm2\n\t" "aesdeclast %%xmm14, %%xmm3\n\t" "aesdeclast %%xmm15, %%xmm4\n\t" "aesdeclast %[tmpbuf0],%%xmm8\n\t" "aesdeclast %[tmpbuf1],%%xmm9\n\t" "aesdeclast %[tmpbuf2],%%xmm10\n\t" "aesdeclast %%xmm5, %%xmm11\n\t" "pxor %[lxfkey], %%xmm11\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" "movdqu %%xmm8, %[outbuf4]\n\t" "movdqu %%xmm9, %[outbuf5]\n\t" "movdqu %%xmm10, %[outbuf6]\n\t" "movdqu %%xmm11, %[outbuf7]\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)), [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)), [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)), [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)), [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE)) : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)), [lxfkey] "m" (*lxf_key) : "memory" ); outbuf += 8*BLOCKSIZE; inbuf += 8*BLOCKSIZE; } asm volatile ("pxor %[first_key], %%xmm5\n\t" "pxor %%xmm0, %%xmm0\n\t" "movdqu %%xmm0, %[lxfkey]\n\t" : [lxfkey] "=m" (*lxf_key) : [first_key] "m" (ctx->keyschdec[0][0][0]) : "memory" ); aesni_cleanup_8_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* C_i = Offset_i xor DECIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l0], %%xmm0\n\t" "movdqu %[inbuf0], %%xmm1\n\t" "movdqu %[l0l1], %%xmm3\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]), [l0l1] "m" (*c->u_mode.ocb.L0L1), [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l1], %%xmm4\n\t" "movdqu %[l3], %%xmm6\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" "movdqa %%xmm0, %[tmpbuf0]\n\t" : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)) : [l1] "m" (*c->u_mode.ocb.L[1]), [l3] "m" (*l) : "memory" ); asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm3, %%xmm2\n\t" "movdqa %%xmm3, %[tmpbuf1]\n\t" : [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)) : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqa %%xmm4, %%xmm0\n\t" "movdqu %[inbuf2], %%xmm3\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm0, %%xmm3\n\t" "movdqa %%xmm0, %[tmpbuf2]\n\t" : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE)) : [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("pxor %%xmm6, %%xmm5\n\t" "pxor %%xmm4, %%xmm5\n\t" "movdqu %[inbuf3], %%xmm4\n\t" "pxor %%xmm5, %%xmm4\n\t" : : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_dec_vec4 (ctx); asm volatile ("pxor %[tmpbuf0],%%xmm1\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "pxor %[tmpbuf1],%%xmm2\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "pxor %[tmpbuf2],%%xmm3\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)) : "memory" ); outbuf += 4*BLOCKSIZE; inbuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ /* Checksum_i = Checksum_{i-1} xor P_i */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_aesni_dec (ctx); asm volatile ("pxor %%xmm5, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" : [iv] "=m" (*c->u_iv.iv) : : "memory" ); asm volatile ("pxor %%xmm0, %%xmm0\n\t" "movdqa %%xmm0, %[tmpbuf0]\n\t" "movdqa %%xmm0, %[tmpbuf1]\n\t" "movdqa %%xmm0, %[tmpbuf2]\n\t" : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)), [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)), [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE)) : : "memory" ); aesni_ocb_checksum (c, outbuf_arg, nblocks_arg); aesni_cleanup (); aesni_cleanup_2_7 (); return 0; } -size_t +size_t ASM_FUNC_ATTR _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { if (encrypt) return aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); else return aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); } -size_t +size_t ASM_FUNC_ATTR _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; u64 n = c->u_mode.ocb.aad_nblocks; const unsigned char *l; aesni_prepare_2_7_variable; aesni_prepare (); aesni_prepare_2_7 (); /* Preload Offset and Sum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" "movdqu %[ctr], %%xmm6\n\t" : /* No output */ : [iv] "m" (*c->u_mode.ocb.aad_offset), [ctr] "m" (*c->u_mode.ocb.aad_sum) : "memory" ); for ( ;nblocks && n % 4; nblocks-- ) { l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[abuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [abuf] "m" (*abuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm0, %%xmm6\n\t" : : : "memory" ); abuf += BLOCKSIZE; } #ifdef __x86_64__ if (nblocks >= 8) { aesni_prepare_8_15_variable; aesni_prepare_8_15(); asm volatile ("movdqu %[l0], %%xmm7\n\t" "movdqu %[l0l1], %%xmm12\n\t" "movdqu %[l1], %%xmm13\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]), [l0l1] "m" (*c->u_mode.ocb.L0L1), [l1] "m" (*c->u_mode.ocb.L[1]) : "memory" ); for ( ;nblocks >= 8 ; nblocks -= 8 ) { n += 4; l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[l3], %%xmm0\n\t" "pxor %%xmm13, %%xmm0\n\t" : : [l3] "m" (*l) : "memory" ); n += 4; l = aes_ocb_get_l(c, n); asm volatile ("movdqu %[l7], %%xmm14\n\t" "pxor %%xmm13, %%xmm14\n\t" : : [l7] "m" (*l) : "memory" ); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[abuf0], %%xmm1\n\t" "movdqu %[abuf1], %%xmm2\n\t" "movdqu %[abuf2], %%xmm3\n\t" "movdqu %[abuf3], %%xmm4\n\t" "movdqu %[abuf4], %%xmm8\n\t" "movdqu %[abuf5], %%xmm9\n\t" "movdqu %[abuf6], %%xmm10\n\t" "movdqu %[abuf7], %%xmm11\n\t" : : [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)), [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)), [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)), [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)), [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)), [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)), [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)), [abuf7] "m" (*(abuf + 7 * BLOCKSIZE)) : "memory" ); asm volatile ("pxor %%xmm7, %%xmm1\n\t" "pxor %%xmm5, %%xmm1\n\t" "pxor %%xmm12, %%xmm2\n\t" "pxor %%xmm5, %%xmm2\n\t" "pxor %%xmm13, %%xmm3\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm0, %%xmm5\n\t" "pxor %%xmm5, %%xmm4\n\t" "pxor %%xmm7, %%xmm8\n\t" "pxor %%xmm5, %%xmm8\n\t" "pxor %%xmm12, %%xmm9\n\t" "pxor %%xmm5, %%xmm9\n\t" "pxor %%xmm13, %%xmm10\n\t" "pxor %%xmm5, %%xmm10\n\t" "pxor %%xmm14, %%xmm5\n\t" "pxor %%xmm5, %%xmm11\n\t" : : : "memory" ); do_aesni_enc_vec8 (ctx); asm volatile ("pxor %%xmm2, %%xmm1\n\t" "pxor %%xmm3, %%xmm1\n\t" "pxor %%xmm4, %%xmm1\n\t" "pxor %%xmm8, %%xmm1\n\t" "pxor %%xmm9, %%xmm6\n\t" "pxor %%xmm10, %%xmm6\n\t" "pxor %%xmm11, %%xmm6\n\t" "pxor %%xmm1, %%xmm6\n\t" : : : "memory" ); abuf += 8*BLOCKSIZE; } aesni_cleanup_8_15(); } #endif for ( ;nblocks >= 4 ; nblocks -= 4 ) { n += 4; l = aes_ocb_get_l(c, n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[l0], %%xmm0\n\t" "movdqu %[abuf0], %%xmm1\n\t" "movdqu %[l0l1], %%xmm3\n\t" : : [l0] "m" (*c->u_mode.ocb.L[0]), [l0l1] "m" (*c->u_mode.ocb.L0L1), [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqu %[l1], %%xmm4\n\t" "movdqu %[l3], %%xmm7\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" : : [l1] "m" (*c->u_mode.ocb.L[1]), [l3] "m" (*l) : "memory" ); asm volatile ("movdqu %[abuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm3\n\t" "pxor %%xmm3, %%xmm2\n\t" : : [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)) : "memory" ); asm volatile ("movdqa %%xmm4, %%xmm0\n\t" "movdqu %[abuf2], %%xmm3\n\t" "pxor %%xmm5, %%xmm0\n\t" "pxor %%xmm0, %%xmm3\n\t" : : [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)) : "memory" ); asm volatile ("pxor %%xmm7, %%xmm5\n\t" "pxor %%xmm4, %%xmm5\n\t" "movdqu %[abuf3], %%xmm4\n\t" "pxor %%xmm5, %%xmm4\n\t" : : [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) : "memory" ); do_aesni_enc_vec4 (ctx); asm volatile ("pxor %%xmm1, %%xmm6\n\t" "pxor %%xmm2, %%xmm6\n\t" "pxor %%xmm3, %%xmm6\n\t" "pxor %%xmm4, %%xmm6\n\t" : : : "memory" ); abuf += 4*BLOCKSIZE; } for ( ;nblocks; nblocks-- ) { l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[abuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm5\n\t" "pxor %%xmm5, %%xmm0\n\t" : : [l] "m" (*l), [abuf] "m" (*abuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm0, %%xmm6\n\t" : : : "memory" ); abuf += BLOCKSIZE; } c->u_mode.ocb.aad_nblocks = n; asm volatile ("movdqu %%xmm5, %[iv]\n\t" "movdqu %%xmm6, %[ctr]\n\t" : [iv] "=m" (*c->u_mode.ocb.aad_offset), [ctr] "=m" (*c->u_mode.ocb.aad_sum) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_7 (); return 0; } static const u64 xts_gfmul_const[16] __attribute__ ((aligned (16))) = { 0x87, 0x01 }; -static void +static void ASM_FUNC_ATTR _gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { aesni_prepare_2_7_variable; aesni_prepare (); aesni_prepare_2_7 (); /* Preload Tweak */ asm volatile ("movdqu %[tweak], %%xmm5\n\t" "movdqa %[gfmul], %%xmm6\n\t" : : [tweak] "m" (*tweak), [gfmul] "m" (*xts_gfmul_const) : "memory" ); for ( ;nblocks >= 4; nblocks -= 4 ) { asm volatile ("pshufd $0x13, %%xmm5, %%xmm4\n\t" "movdqu %[inbuf0], %%xmm1\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu %%xmm5, %[outbuf0]\n\t" "movdqa %%xmm4, %%xmm0\n\t" "paddd %%xmm4, %%xmm4\n\t" "psrad $31, %%xmm0\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm5\n\t" : [outbuf0] "=m" (*(outbuf + 0 * 16)) : [inbuf0] "m" (*(inbuf + 0 * 16)) : "memory" ); asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm2\n\t" "movdqu %%xmm5, %[outbuf1]\n\t" "movdqa %%xmm4, %%xmm0\n\t" "paddd %%xmm4, %%xmm4\n\t" "psrad $31, %%xmm0\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm5\n\t" : [outbuf1] "=m" (*(outbuf + 1 * 16)) : [inbuf1] "m" (*(inbuf + 1 * 16)) : "memory" ); asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqu %%xmm5, %[outbuf2]\n\t" "movdqa %%xmm4, %%xmm0\n\t" "paddd %%xmm4, %%xmm4\n\t" "psrad $31, %%xmm0\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm5\n\t" : [outbuf2] "=m" (*(outbuf + 2 * 16)) : [inbuf2] "m" (*(inbuf + 2 * 16)) : "memory" ); asm volatile ("movdqa %%xmm4, %%xmm0\n\t" "movdqu %[inbuf3], %%xmm4\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm5, %[outbuf3]\n\t" "psrad $31, %%xmm0\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm5\n\t" : [outbuf3] "=m" (*(outbuf + 3 * 16)) : [inbuf3] "m" (*(inbuf + 3 * 16)) : "memory" ); do_aesni_enc_vec4 (ctx); asm volatile ("movdqu %[outbuf0], %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" "movdqu %[outbuf1], %%xmm0\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %[outbuf2], %%xmm1\n\t" "pxor %%xmm0, %%xmm2\n\t" "movdqu %[outbuf3], %%xmm0\n\t" "pxor %%xmm1, %%xmm3\n\t" "pxor %%xmm0, %%xmm4\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" : [outbuf0] "+m" (*(outbuf + 0 * 16)), [outbuf1] "+m" (*(outbuf + 1 * 16)), [outbuf2] "+m" (*(outbuf + 2 * 16)), [outbuf3] "+m" (*(outbuf + 3 * 16)) : : "memory" ); outbuf += BLOCKSIZE * 4; inbuf += BLOCKSIZE * 4; } for ( ;nblocks; nblocks-- ) { asm volatile ("movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm5, %%xmm0\n\t" "movdqa %%xmm5, %%xmm4\n\t" "pshufd $0x13, %%xmm5, %%xmm1\n\t" "psrad $31, %%xmm1\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm1\n\t" "pxor %%xmm1, %%xmm5\n\t" : : [inbuf] "m" (*inbuf) : "memory" ); do_aesni_enc (ctx); asm volatile ("pxor %%xmm4, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm5, %[tweak]\n\t" : [tweak] "=m" (*tweak) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_7 (); } -static void +static void ASM_FUNC_ATTR _gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { aesni_prepare_2_7_variable; aesni_prepare (); aesni_prepare_2_7 (); if ( !ctx->decryption_prepared ) { do_aesni_prepare_decryption ( ctx ); ctx->decryption_prepared = 1; } /* Preload Tweak */ asm volatile ("movdqu %[tweak], %%xmm5\n\t" "movdqa %[gfmul], %%xmm6\n\t" : : [tweak] "m" (*tweak), [gfmul] "m" (*xts_gfmul_const) : "memory" ); for ( ;nblocks >= 4; nblocks -= 4 ) { asm volatile ("pshufd $0x13, %%xmm5, %%xmm4\n\t" "movdqu %[inbuf0], %%xmm1\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu %%xmm5, %[outbuf0]\n\t" "movdqa %%xmm4, %%xmm0\n\t" "paddd %%xmm4, %%xmm4\n\t" "psrad $31, %%xmm0\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm5\n\t" : [outbuf0] "=m" (*(outbuf + 0 * 16)) : [inbuf0] "m" (*(inbuf + 0 * 16)) : "memory" ); asm volatile ("movdqu %[inbuf1], %%xmm2\n\t" "pxor %%xmm5, %%xmm2\n\t" "movdqu %%xmm5, %[outbuf1]\n\t" "movdqa %%xmm4, %%xmm0\n\t" "paddd %%xmm4, %%xmm4\n\t" "psrad $31, %%xmm0\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm5\n\t" : [outbuf1] "=m" (*(outbuf + 1 * 16)) : [inbuf1] "m" (*(inbuf + 1 * 16)) : "memory" ); asm volatile ("movdqu %[inbuf2], %%xmm3\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqu %%xmm5, %[outbuf2]\n\t" "movdqa %%xmm4, %%xmm0\n\t" "paddd %%xmm4, %%xmm4\n\t" "psrad $31, %%xmm0\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm5\n\t" : [outbuf2] "=m" (*(outbuf + 2 * 16)) : [inbuf2] "m" (*(inbuf + 2 * 16)) : "memory" ); asm volatile ("movdqa %%xmm4, %%xmm0\n\t" "movdqu %[inbuf3], %%xmm4\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm5, %[outbuf3]\n\t" "psrad $31, %%xmm0\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm0\n\t" "pxor %%xmm0, %%xmm5\n\t" : [outbuf3] "=m" (*(outbuf + 3 * 16)) : [inbuf3] "m" (*(inbuf + 3 * 16)) : "memory" ); do_aesni_dec_vec4 (ctx); asm volatile ("movdqu %[outbuf0], %%xmm0\n\t" "pxor %%xmm0, %%xmm1\n\t" "movdqu %[outbuf1], %%xmm0\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %[outbuf2], %%xmm1\n\t" "pxor %%xmm0, %%xmm2\n\t" "movdqu %[outbuf3], %%xmm0\n\t" "pxor %%xmm1, %%xmm3\n\t" "pxor %%xmm0, %%xmm4\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" : [outbuf0] "+m" (*(outbuf + 0 * 16)), [outbuf1] "+m" (*(outbuf + 1 * 16)), [outbuf2] "+m" (*(outbuf + 2 * 16)), [outbuf3] "+m" (*(outbuf + 3 * 16)) : : "memory" ); outbuf += BLOCKSIZE * 4; inbuf += BLOCKSIZE * 4; } for ( ;nblocks; nblocks-- ) { asm volatile ("movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm5, %%xmm0\n\t" "movdqa %%xmm5, %%xmm4\n\t" "pshufd $0x13, %%xmm5, %%xmm1\n\t" "psrad $31, %%xmm1\n\t" "paddq %%xmm5, %%xmm5\n\t" "pand %%xmm6, %%xmm1\n\t" "pxor %%xmm1, %%xmm5\n\t" : : [inbuf] "m" (*inbuf) : "memory" ); do_aesni_dec (ctx); asm volatile ("pxor %%xmm4, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm5, %[tweak]\n\t" : [tweak] "=m" (*tweak) : : "memory" ); aesni_cleanup (); aesni_cleanup_2_7 (); } -void +void ASM_FUNC_ATTR _gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks, int encrypt) { if (encrypt) _gcry_aes_aesni_xts_enc(ctx, tweak, outbuf, inbuf, nblocks); else _gcry_aes_aesni_xts_dec(ctx, tweak, outbuf, inbuf, nblocks); } #if __clang__ # pragma clang attribute pop #endif #endif /* USE_AESNI */ diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c index 0c1ae6e6..b0723853 100644 --- a/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -1,736 +1,743 @@ /* SSSE3 vector permutation AES for Libgcrypt * Copyright (C) 2014-2017 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * * * The code is based on the public domain library libvpaes version 0.5 * available at http://crypto.stanford.edu/vpaes/ and which carries * this notice: * * libvpaes: constant-time SSSE3 AES encryption and decryption. * version 0.5 * * By Mike Hamburg, Stanford University, 2009. Public domain. * I wrote essentially all of this code. I did not write the test * vectors; they are the NIST known answer tests. I hereby release all * the code and documentation here that I wrote into the public domain. * * This is an implementation of AES following my paper, * "Accelerating AES with Vector Permute Instructions" * CHES 2009; http://shiftleft.org/papers/vector_aes/ */ #include #include #include #include /* for memcmp() */ #include "types.h" /* for byte and u32 typedefs */ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" #include "./cipher-internal.h" #ifdef USE_SSSE3 #if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif #if __clang__ # pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) #endif +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE + + /* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l because of 'pragma target'. */ -static inline const unsigned char * +static ASM_FUNC_ATTR_INLINE const unsigned char * aes_ocb_get_l (gcry_cipher_hd_t c, u64 n) { unsigned long ntz; /* Assumes that N != 0. */ asm ("rep;bsfl %k[low], %k[ntz]\n\t" : [ntz] "=r" (ntz) : [low] "r" ((unsigned long)n) : "cc"); return c->u_mode.ocb.L[ntz]; } /* Assembly functions in rijndael-ssse3-amd64-asm.S. Note that these have custom calling convention (additional XMM parameters). */ extern void _gcry_aes_ssse3_enc_preload(void); extern void _gcry_aes_ssse3_dec_preload(void); extern void _gcry_aes_ssse3_schedule_core(const void *key, u64 keybits, void *buffer, u64 decrypt, u64 rotoffs); extern void _gcry_aes_ssse3_encrypt_core(const void *key, u64 nrounds); extern void _gcry_aes_ssse3_decrypt_core(const void *key, u64 nrounds); /* Two macros to be called prior and after the use of SSSE3 instructions. There should be no external function calls between the use of these macros. There purpose is to make sure that the SSE registers are cleared and won't reveal any information about the key or the data. */ #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define SSSE3_STATE_SIZE (16 * 10) /* XMM6-XMM15 are callee-saved registers on WIN64. */ # define vpaes_ssse3_prepare() \ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" \ "movdqu %%xmm7, 1*16(%0)\n\t" \ "movdqu %%xmm8, 2*16(%0)\n\t" \ "movdqu %%xmm9, 3*16(%0)\n\t" \ "movdqu %%xmm10, 4*16(%0)\n\t" \ "movdqu %%xmm11, 5*16(%0)\n\t" \ "movdqu %%xmm12, 6*16(%0)\n\t" \ "movdqu %%xmm13, 7*16(%0)\n\t" \ "movdqu %%xmm14, 8*16(%0)\n\t" \ "movdqu %%xmm15, 9*16(%0)\n\t" \ : \ : "r" (ssse3_state) \ : "memory" ) # define vpaes_ssse3_cleanup() \ asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ "pxor %%xmm1, %%xmm1 \n\t" \ "pxor %%xmm2, %%xmm2 \n\t" \ "pxor %%xmm3, %%xmm3 \n\t" \ "pxor %%xmm4, %%xmm4 \n\t" \ "pxor %%xmm5, %%xmm5 \n\t" \ "movdqu 0*16(%0), %%xmm6 \n\t" \ "movdqu 1*16(%0), %%xmm7 \n\t" \ "movdqu 2*16(%0), %%xmm8 \n\t" \ "movdqu 3*16(%0), %%xmm9 \n\t" \ "movdqu 4*16(%0), %%xmm10 \n\t" \ "movdqu 5*16(%0), %%xmm11 \n\t" \ "movdqu 6*16(%0), %%xmm12 \n\t" \ "movdqu 7*16(%0), %%xmm13 \n\t" \ "movdqu 8*16(%0), %%xmm14 \n\t" \ "movdqu 9*16(%0), %%xmm15 \n\t" \ : \ : "r" (ssse3_state) \ : "memory" ) #else # define SSSE3_STATE_SIZE 1 # define vpaes_ssse3_prepare() (void)ssse3_state # define vpaes_ssse3_cleanup() \ asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ "pxor %%xmm1, %%xmm1 \n\t" \ "pxor %%xmm2, %%xmm2 \n\t" \ "pxor %%xmm3, %%xmm3 \n\t" \ "pxor %%xmm4, %%xmm4 \n\t" \ "pxor %%xmm5, %%xmm5 \n\t" \ "pxor %%xmm6, %%xmm6 \n\t" \ "pxor %%xmm7, %%xmm7 \n\t" \ "pxor %%xmm8, %%xmm8 \n\t" \ ::: "memory" ) #endif #define vpaes_ssse3_prepare_enc() \ vpaes_ssse3_prepare(); \ _gcry_aes_ssse3_enc_preload(); #define vpaes_ssse3_prepare_dec() \ vpaes_ssse3_prepare(); \ _gcry_aes_ssse3_dec_preload(); -void +void ASM_FUNC_ATTR _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare(); _gcry_aes_ssse3_schedule_core(key, keybits, &ctx->keyschenc32[0][0], 0, 48); /* Save key for setting up decryption. */ if (keybits > 192) asm volatile ("movdqu (%[src]), %%xmm0\n\t" "movdqu 16(%[src]), %%xmm1\n\t" "movdqu %%xmm0, (%[dst])\n\t" "movdqu %%xmm1, 16(%[dst])\n\t" : /* No output */ : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key) : "memory" ); else if (keybits == 192) asm volatile ("movdqu (%[src]), %%xmm0\n\t" "movq 16(%[src]), %%xmm1\n\t" "movdqu %%xmm0, (%[dst])\n\t" "movq %%xmm1, 16(%[dst])\n\t" : /* No output */ : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key) : "memory" ); else asm volatile ("movdqu (%[src]), %%xmm0\n\t" "movdqu %%xmm0, (%[dst])\n\t" : /* No output */ : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key) : "memory" ); vpaes_ssse3_cleanup(); } /* Make a decryption key from an encryption key. */ -static inline void +static ASM_FUNC_ATTR_INLINE void do_ssse3_prepare_decryption (RIJNDAEL_context *ctx, byte ssse3_state[SSSE3_STATE_SIZE]) { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; vpaes_ssse3_prepare(); _gcry_aes_ssse3_schedule_core(&ctx->keyschdec32[0][0], keybits, &ctx->keyschdec32[ctx->rounds][0], 1, (keybits == 192) ? 0 : 32); vpaes_ssse3_cleanup(); } -void +void ASM_FUNC_ATTR _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx) { byte ssse3_state[SSSE3_STATE_SIZE]; do_ssse3_prepare_decryption(ctx, ssse3_state); } /* Encrypt one block using the Intel SSSE3 instructions. Block is input * and output through SSE register xmm0. */ -static inline void +static ASM_FUNC_ATTR_INLINE void do_vpaes_ssse3_enc (const RIJNDAEL_context *ctx, unsigned int nrounds) { _gcry_aes_ssse3_encrypt_core(ctx->keyschenc32, nrounds); } /* Decrypt one block using the Intel SSSE3 instructions. Block is input * and output through SSE register xmm0. */ -static inline void +static ASM_FUNC_ATTR_INLINE void do_vpaes_ssse3_dec (const RIJNDAEL_context *ctx, unsigned int nrounds) { _gcry_aes_ssse3_decrypt_core(ctx->keyschdec32, nrounds); } -unsigned int +unsigned int ASM_FUNC_ATTR _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (); asm volatile ("movdqu %[src], %%xmm0\n\t" : : [src] "m" (*src) : "memory" ); do_vpaes_ssse3_enc (ctx, nrounds); asm volatile ("movdqu %%xmm0, %[dst]\n\t" : [dst] "=m" (*dst) : : "memory" ); vpaes_ssse3_cleanup (); return 0; } -void +void ASM_FUNC_ATTR _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (); asm volatile ("movdqu %[iv], %%xmm0\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); for ( ;nblocks; nblocks-- ) { do_vpaes_ssse3_enc (ctx, nrounds); asm volatile ("movdqu %[inbuf], %%xmm1\n\t" "pxor %%xmm1, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : [inbuf] "m" (*inbuf) : "memory" ); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm0, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); vpaes_ssse3_cleanup (); } -void +void ASM_FUNC_ATTR _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks, int cbc_mac) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (); asm volatile ("movdqu %[iv], %%xmm7\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); for ( ;nblocks; nblocks-- ) { asm volatile ("movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm7, %%xmm0\n\t" : /* No output */ : [inbuf] "m" (*inbuf) : "memory" ); do_vpaes_ssse3_enc (ctx, nrounds); asm volatile ("movdqa %%xmm0, %%xmm7\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; if (!cbc_mac) outbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm7, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); vpaes_ssse3_cleanup (); } -void +void ASM_FUNC_ATTR _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; u64 ctrlow; vpaes_ssse3_prepare_enc (); asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */ "movdqa (%[ctr]), %%xmm7\n\t" /* Preload CTR */ "movq 8(%[ctr]), %q[ctrlow]\n\t" "bswapq %q[ctrlow]\n\t" : [ctrlow] "=r" (ctrlow) : [mask] "m" (*be_mask), [ctr] "r" (ctr) : "memory", "cc"); for ( ;nblocks; nblocks-- ) { asm volatile ("movdqa %%xmm7, %%xmm0\n\t" /* xmm0 := CTR (xmm7) */ "pcmpeqd %%xmm1, %%xmm1\n\t" "psrldq $8, %%xmm1\n\t" /* xmm1 = -1 */ "pshufb %%xmm6, %%xmm7\n\t" "psubq %%xmm1, %%xmm7\n\t" /* xmm7++ (big endian) */ /* detect if 64-bit carry handling is needed */ "incq %q[ctrlow]\n\t" "jnz .Lno_carry%=\n\t" "pslldq $8, %%xmm1\n\t" /* move lower 64-bit to high */ "psubq %%xmm1, %%xmm7\n\t" /* add carry to upper 64bits */ ".Lno_carry%=:\n\t" "pshufb %%xmm6, %%xmm7\n\t" : [ctrlow] "+r" (ctrlow) : : "cc", "memory"); do_vpaes_ssse3_enc (ctx, nrounds); asm volatile ("movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ : [dst] "=m" (*outbuf) : [src] "m" (*inbuf) : "memory"); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm7, %[ctr]\n\t" /* Update CTR (mem). */ : [ctr] "=m" (*ctr) : : "memory" ); vpaes_ssse3_cleanup (); } -unsigned int +unsigned int ASM_FUNC_ATTR _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_dec (); asm volatile ("movdqu %[src], %%xmm0\n\t" : : [src] "m" (*src) : "memory" ); do_vpaes_ssse3_dec (ctx, nrounds); asm volatile ("movdqu %%xmm0, %[dst]\n\t" : [dst] "=m" (*dst) : : "memory" ); vpaes_ssse3_cleanup (); return 0; } -void +void ASM_FUNC_ATTR _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (); asm volatile ("movdqu %[iv], %%xmm0\n\t" : /* No output */ : [iv] "m" (*iv) : "memory" ); for ( ;nblocks; nblocks-- ) { do_vpaes_ssse3_enc (ctx, nrounds); asm volatile ("movdqa %%xmm0, %%xmm6\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm6, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : [inbuf] "m" (*inbuf) : "memory" ); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm0, %[iv]\n\t" : [iv] "=m" (*iv) : : "memory" ); vpaes_ssse3_cleanup (); } -void +void ASM_FUNC_ATTR _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, unsigned char *outbuf, const unsigned char *inbuf, size_t nblocks) { unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; if ( !ctx->decryption_prepared ) { do_ssse3_prepare_decryption ( ctx, ssse3_state ); ctx->decryption_prepared = 1; } vpaes_ssse3_prepare_dec (); asm volatile ("movdqu %[iv], %%xmm7\n\t" /* use xmm7 as fast IV storage */ : /* No output */ : [iv] "m" (*iv) : "memory"); for ( ;nblocks; nblocks-- ) { asm volatile ("movdqu %[inbuf], %%xmm0\n\t" "movdqa %%xmm0, %%xmm6\n\t" /* use xmm6 as savebuf */ : /* No output */ : [inbuf] "m" (*inbuf) : "memory"); do_vpaes_ssse3_dec (ctx, nrounds); asm volatile ("pxor %%xmm7, %%xmm0\n\t" /* xor IV with output */ "movdqu %%xmm0, %[outbuf]\n\t" "movdqu %%xmm6, %%xmm7\n\t" /* store savebuf as new IV */ : [outbuf] "=m" (*outbuf) : : "memory"); outbuf += BLOCKSIZE; inbuf += BLOCKSIZE; } asm volatile ("movdqu %%xmm7, %[iv]\n\t" /* store IV */ : /* No output */ : [iv] "m" (*iv) : "memory"); vpaes_ssse3_cleanup (); } -static void +static void ASM_FUNC_ATTR ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (); /* Preload Offset and Checksum */ asm volatile ("movdqu %[iv], %%xmm7\n\t" "movdqu %[ctr], %%xmm6\n\t" : /* No output */ : [iv] "m" (*c->u_iv.iv), [ctr] "m" (*c->u_ctr.ctr) : "memory" ); for ( ;nblocks; nblocks-- ) { const unsigned char *l; l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Checksum_i = Checksum_{i-1} xor P_i */ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm7\n\t" "pxor %%xmm0, %%xmm6\n\t" "pxor %%xmm7, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_vpaes_ssse3_enc (ctx, nrounds); asm volatile ("pxor %%xmm7, %%xmm0\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm7, %[iv]\n\t" "movdqu %%xmm6, %[ctr]\n\t" : [iv] "=m" (*c->u_iv.iv), [ctr] "=m" (*c->u_ctr.ctr) : : "memory" ); vpaes_ssse3_cleanup (); } -static void +static void ASM_FUNC_ATTR ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; if ( !ctx->decryption_prepared ) { do_ssse3_prepare_decryption ( ctx, ssse3_state ); ctx->decryption_prepared = 1; } vpaes_ssse3_prepare_dec (); /* Preload Offset and Checksum */ asm volatile ("movdqu %[iv], %%xmm7\n\t" "movdqu %[ctr], %%xmm6\n\t" : /* No output */ : [iv] "m" (*c->u_iv.iv), [ctr] "m" (*c->u_ctr.ctr) : "memory" ); for ( ;nblocks; nblocks-- ) { const unsigned char *l; l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ /* Checksum_i = Checksum_{i-1} xor P_i */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[inbuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm7\n\t" "pxor %%xmm7, %%xmm0\n\t" : : [l] "m" (*l), [inbuf] "m" (*inbuf) : "memory" ); do_vpaes_ssse3_dec (ctx, nrounds); asm volatile ("pxor %%xmm7, %%xmm0\n\t" "pxor %%xmm0, %%xmm6\n\t" "movdqu %%xmm0, %[outbuf]\n\t" : [outbuf] "=m" (*outbuf) : : "memory" ); inbuf += BLOCKSIZE; outbuf += BLOCKSIZE; } c->u_mode.ocb.data_nblocks = n; asm volatile ("movdqu %%xmm7, %[iv]\n\t" "movdqu %%xmm6, %[ctr]\n\t" : [iv] "=m" (*c->u_iv.iv), [ctr] "=m" (*c->u_ctr.ctr) : : "memory" ); vpaes_ssse3_cleanup (); } -size_t +size_t ASM_FUNC_ATTR _gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { if (encrypt) ssse3_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); else ssse3_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); return 0; } -size_t +size_t ASM_FUNC_ATTR _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; u64 n = c->u_mode.ocb.aad_nblocks; unsigned int nrounds = ctx->rounds; byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (); /* Preload Offset and Sum */ asm volatile ("movdqu %[iv], %%xmm7\n\t" "movdqu %[ctr], %%xmm6\n\t" : /* No output */ : [iv] "m" (*c->u_mode.ocb.aad_offset), [ctr] "m" (*c->u_mode.ocb.aad_sum) : "memory" ); for ( ;nblocks; nblocks-- ) { const unsigned char *l; l = aes_ocb_get_l(c, ++n); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ asm volatile ("movdqu %[l], %%xmm1\n\t" "movdqu %[abuf], %%xmm0\n\t" "pxor %%xmm1, %%xmm7\n\t" "pxor %%xmm7, %%xmm0\n\t" : : [l] "m" (*l), [abuf] "m" (*abuf) : "memory" ); do_vpaes_ssse3_enc (ctx, nrounds); asm volatile ("pxor %%xmm0, %%xmm6\n\t" : : : "memory" ); abuf += BLOCKSIZE; } c->u_mode.ocb.aad_nblocks = n; asm volatile ("movdqu %%xmm7, %[iv]\n\t" "movdqu %%xmm6, %[ctr]\n\t" : [iv] "=m" (*c->u_mode.ocb.aad_offset), [ctr] "=m" (*c->u_mode.ocb.aad_sum) : : "memory" ); vpaes_ssse3_cleanup (); return 0; } #if __clang__ # pragma clang attribute pop #endif #endif /* USE_SSSE3 */ diff --git a/cipher/sha1-intel-shaext.c b/cipher/sha1-intel-shaext.c index d7e3d4f8..ddf2be2a 100644 --- a/cipher/sha1-intel-shaext.c +++ b/cipher/sha1-intel-shaext.c @@ -1,288 +1,292 @@ /* sha1-intel-shaext.S - SHAEXT accelerated SHA-1 transform function * Copyright (C) 2018 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include "types.h" #if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \ defined(HAVE_GCC_INLINE_ASM_SSE41) && defined(USE_SHA1) && \ defined(ENABLE_SHAEXT_SUPPORT) #if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif #if __clang__ # pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) #endif +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION + /* Two macros to be called prior and after the use of SHA-EXT instructions. There should be no external function calls between the use of these macros. There purpose is to make sure that the SSE regsiters are cleared and won't reveal any information about the key or the data. */ #ifdef __WIN64__ /* XMM6-XMM15 are callee-saved registers on WIN64. */ # define shaext_prepare_variable char win64tmp[2*16] # define shaext_prepare_variable_size sizeof(win64tmp) # define shaext_prepare() \ do { asm volatile ("movdqu %%xmm6, (%0)\n" \ "movdqu %%xmm7, (%1)\n" \ : \ : "r" (&win64tmp[0]), "r" (&win64tmp[16]) \ : "memory"); \ } while (0) # define shaext_cleanup(tmp0,tmp1) \ do { asm volatile ("movdqu (%0), %%xmm6\n" \ "movdqu (%1), %%xmm7\n" \ "pxor %%xmm0, %%xmm0\n" \ "pxor %%xmm1, %%xmm1\n" \ "pxor %%xmm2, %%xmm2\n" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ "movdqa %%xmm0, (%2)\n\t" \ "movdqa %%xmm0, (%3)\n\t" \ : \ : "r" (&win64tmp[0]), "r" (&win64tmp[16]), \ "r" (tmp0), "r" (tmp1) \ : "memory"); \ } while (0) #else # define shaext_prepare_variable # define shaext_prepare_variable_size 0 # define shaext_prepare() do { } while (0) # define shaext_cleanup(tmp0,tmp1) \ do { asm volatile ("pxor %%xmm0, %%xmm0\n" \ "pxor %%xmm1, %%xmm1\n" \ "pxor %%xmm2, %%xmm2\n" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ "pxor %%xmm6, %%xmm6\n" \ "pxor %%xmm7, %%xmm7\n" \ "movdqa %%xmm0, (%0)\n\t" \ "movdqa %%xmm0, (%1)\n\t" \ : \ : "r" (tmp0), "r" (tmp1) \ : "memory"); \ } while (0) #endif /* * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. */ -unsigned int +unsigned int ASM_FUNC_ATTR _gcry_sha1_transform_intel_shaext(void *state, const unsigned char *data, size_t nblks) { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; char save_buf[2 * 16 + 15]; char *abcd_save; char *e_save; shaext_prepare_variable; if (nblks == 0) return 0; shaext_prepare (); asm volatile ("" : "=r" (abcd_save) : "0" (save_buf) : "memory"); abcd_save = abcd_save + (-(uintptr_t)abcd_save & 15); e_save = abcd_save + 16; /* byteswap mask => XMM7 */ asm volatile ("movdqa %[mask], %%xmm7\n\t" /* Preload mask */ : : [mask] "m" (*be_mask) : "memory"); /* Load state.. ABCD => XMM4, E => XMM5 */ asm volatile ("movd 16(%[state]), %%xmm5\n\t" "movdqu (%[state]), %%xmm4\n\t" "pslldq $12, %%xmm5\n\t" "pshufd $0x1b, %%xmm4, %%xmm4\n\t" "movdqa %%xmm5, (%[e_save])\n\t" "movdqa %%xmm4, (%[abcd_save])\n\t" : : [state] "r" (state), [abcd_save] "r" (abcd_save), [e_save] "r" (e_save) : "memory" ); /* DATA => XMM[0..4] */ asm volatile ("movdqu 0(%[data]), %%xmm0\n\t" "movdqu 16(%[data]), %%xmm1\n\t" "movdqu 32(%[data]), %%xmm2\n\t" "movdqu 48(%[data]), %%xmm3\n\t" "pshufb %%xmm7, %%xmm0\n\t" "pshufb %%xmm7, %%xmm1\n\t" "pshufb %%xmm7, %%xmm2\n\t" "pshufb %%xmm7, %%xmm3\n\t" : : [data] "r" (data) : "memory" ); data += 64; while (1) { /* Round 0..3 */ asm volatile ("paddd %%xmm0, %%xmm5\n\t" "movdqa %%xmm4, %%xmm6\n\t" /* ABCD => E1 */ "sha1rnds4 $0, %%xmm5, %%xmm4\n\t" ::: "memory" ); /* Round 4..7 */ asm volatile ("sha1nexte %%xmm1, %%xmm6\n\t" "movdqa %%xmm4, %%xmm5\n\t" "sha1rnds4 $0, %%xmm6, %%xmm4\n\t" "sha1msg1 %%xmm1, %%xmm0\n\t" ::: "memory" ); /* Round 8..11 */ asm volatile ("sha1nexte %%xmm2, %%xmm5\n\t" "movdqa %%xmm4, %%xmm6\n\t" "sha1rnds4 $0, %%xmm5, %%xmm4\n\t" "sha1msg1 %%xmm2, %%xmm1\n\t" "pxor %%xmm2, %%xmm0\n\t" ::: "memory" ); #define ROUND(imm, E0, E1, MSG0, MSG1, MSG2, MSG3) \ asm volatile ("sha1nexte %%"MSG0", %%"E0"\n\t" \ "movdqa %%xmm4, %%"E1"\n\t" \ "sha1msg2 %%"MSG0", %%"MSG1"\n\t" \ "sha1rnds4 $"imm", %%"E0", %%xmm4\n\t" \ "sha1msg1 %%"MSG0", %%"MSG3"\n\t" \ "pxor %%"MSG0", %%"MSG2"\n\t" \ ::: "memory" ) /* Rounds 12..15 to 64..67 */ ROUND("0", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2"); ROUND("0", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3"); ROUND("1", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0"); ROUND("1", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1"); ROUND("1", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2"); ROUND("1", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3"); ROUND("1", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0"); ROUND("2", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1"); ROUND("2", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2"); ROUND("2", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3"); ROUND("2", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0"); ROUND("2", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1"); ROUND("3", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2"); ROUND("3", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3"); if (--nblks == 0) break; /* Round 68..71 */ asm volatile ("movdqu 0(%[data]), %%xmm0\n\t" "sha1nexte %%xmm1, %%xmm6\n\t" "movdqa %%xmm4, %%xmm5\n\t" "sha1msg2 %%xmm1, %%xmm2\n\t" "sha1rnds4 $3, %%xmm6, %%xmm4\n\t" "pxor %%xmm1, %%xmm3\n\t" "pshufb %%xmm7, %%xmm0\n\t" : : [data] "r" (data) : "memory" ); /* Round 72..75 */ asm volatile ("movdqu 16(%[data]), %%xmm1\n\t" "sha1nexte %%xmm2, %%xmm5\n\t" "movdqa %%xmm4, %%xmm6\n\t" "sha1msg2 %%xmm2, %%xmm3\n\t" "sha1rnds4 $3, %%xmm5, %%xmm4\n\t" "pshufb %%xmm7, %%xmm1\n\t" : : [data] "r" (data) : "memory" ); /* Round 76..79 */ asm volatile ("movdqu 32(%[data]), %%xmm2\n\t" "sha1nexte %%xmm3, %%xmm6\n\t" "movdqa %%xmm4, %%xmm5\n\t" "sha1rnds4 $3, %%xmm6, %%xmm4\n\t" "pshufb %%xmm7, %%xmm2\n\t" : : [data] "r" (data) : "memory" ); /* Merge states, store current. */ asm volatile ("movdqu 48(%[data]), %%xmm3\n\t" "sha1nexte (%[e_save]), %%xmm5\n\t" "paddd (%[abcd_save]), %%xmm4\n\t" "pshufb %%xmm7, %%xmm3\n\t" "movdqa %%xmm5, (%[e_save])\n\t" "movdqa %%xmm4, (%[abcd_save])\n\t" : : [abcd_save] "r" (abcd_save), [e_save] "r" (e_save), [data] "r" (data) : "memory" ); data += 64; } /* Round 68..71 */ asm volatile ("sha1nexte %%xmm1, %%xmm6\n\t" "movdqa %%xmm4, %%xmm5\n\t" "sha1msg2 %%xmm1, %%xmm2\n\t" "sha1rnds4 $3, %%xmm6, %%xmm4\n\t" "pxor %%xmm1, %%xmm3\n\t" ::: "memory" ); /* Round 72..75 */ asm volatile ("sha1nexte %%xmm2, %%xmm5\n\t" "movdqa %%xmm4, %%xmm6\n\t" "sha1msg2 %%xmm2, %%xmm3\n\t" "sha1rnds4 $3, %%xmm5, %%xmm4\n\t" ::: "memory" ); /* Round 76..79 */ asm volatile ("sha1nexte %%xmm3, %%xmm6\n\t" "movdqa %%xmm4, %%xmm5\n\t" "sha1rnds4 $3, %%xmm6, %%xmm4\n\t" ::: "memory" ); /* Merge states. */ asm volatile ("sha1nexte (%[e_save]), %%xmm5\n\t" "paddd (%[abcd_save]), %%xmm4\n\t" : : [abcd_save] "r" (abcd_save), [e_save] "r" (e_save) : "memory" ); /* Save state */ asm volatile ("pshufd $0x1b, %%xmm4, %%xmm4\n\t" "psrldq $12, %%xmm5\n\t" "movdqu %%xmm4, (%[state])\n\t" "movd %%xmm5, 16(%[state])\n\t" : : [state] "r" (state) : "memory" ); shaext_cleanup (abcd_save, e_save); return 0; } #if __clang__ # pragma clang attribute pop #endif #endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */ diff --git a/cipher/sha256-intel-shaext.c b/cipher/sha256-intel-shaext.c index 2eda42d8..48c09eef 100644 --- a/cipher/sha256-intel-shaext.c +++ b/cipher/sha256-intel-shaext.c @@ -1,359 +1,363 @@ /* sha256-intel-shaext.S - SHAEXT accelerated SHA-256 transform function * Copyright (C) 2018 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include "types.h" #if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \ defined(HAVE_GCC_INLINE_ASM_SSE41) && defined(USE_SHA256) && \ defined(ENABLE_SHAEXT_SUPPORT) #if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ /* Prevent compiler from issuing SSE instructions between asm blocks. */ # pragma GCC target("no-sse") #endif #if __clang__ # pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) #endif +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION + /* Two macros to be called prior and after the use of SHA-EXT instructions. There should be no external function calls between the use of these macros. There purpose is to make sure that the SSE regsiters are cleared and won't reveal any information about the key or the data. */ #ifdef __WIN64__ /* XMM6-XMM15 are callee-saved registers on WIN64. */ # define shaext_prepare_variable char win64tmp[2*16] # define shaext_prepare_variable_size sizeof(win64tmp) # define shaext_prepare() \ do { asm volatile ("movdqu %%xmm6, (%0)\n" \ "movdqu %%xmm7, (%1)\n" \ : \ : "r" (&win64tmp[0]), "r" (&win64tmp[16]) \ : "memory"); \ } while (0) # define shaext_cleanup(tmp0,tmp1) \ do { asm volatile ("movdqu (%0), %%xmm6\n" \ "movdqu (%1), %%xmm7\n" \ "pxor %%xmm0, %%xmm0\n" \ "pxor %%xmm1, %%xmm1\n" \ "pxor %%xmm2, %%xmm2\n" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ "movdqa %%xmm0, (%2)\n\t" \ "movdqa %%xmm0, (%3)\n\t" \ : \ : "r" (&win64tmp[0]), "r" (&win64tmp[16]), \ "r" (tmp0), "r" (tmp1) \ : "memory"); \ } while (0) #else # define shaext_prepare_variable # define shaext_prepare_variable_size 0 # define shaext_prepare() do { } while (0) # define shaext_cleanup(tmp0,tmp1) \ do { asm volatile ("pxor %%xmm0, %%xmm0\n" \ "pxor %%xmm1, %%xmm1\n" \ "pxor %%xmm2, %%xmm2\n" \ "pxor %%xmm3, %%xmm3\n" \ "pxor %%xmm4, %%xmm4\n" \ "pxor %%xmm5, %%xmm5\n" \ "pxor %%xmm6, %%xmm6\n" \ "pxor %%xmm7, %%xmm7\n" \ "movdqa %%xmm0, (%0)\n\t" \ "movdqa %%xmm0, (%1)\n\t" \ : \ : "r" (tmp0), "r" (tmp1) \ : "memory"); \ } while (0) #endif typedef struct u128_s { u32 a, b, c, d; } u128_t; /* * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. */ -unsigned int +unsigned int ASM_FUNC_ATTR _gcry_sha256_transform_intel_shaext(u32 state[8], const unsigned char *data, size_t nblks) { static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; static const u128_t K[16] __attribute__ ((aligned (16))) = { { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 }, { 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 }, { 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 }, { 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 }, { 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc }, { 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da }, { 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 }, { 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 }, { 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 }, { 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 }, { 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 }, { 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 }, { 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 }, { 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 }, { 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 }, { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 } }; char save_buf[2 * 16 + 15]; char *abef_save; char *cdgh_save; shaext_prepare_variable; if (nblks == 0) return 0; shaext_prepare (); asm volatile ("" : "=r" (abef_save) : "0" (save_buf) : "memory"); abef_save = abef_save + (-(uintptr_t)abef_save & 15); cdgh_save = abef_save + 16; /* byteswap mask => XMM7 */ asm volatile ("movdqa %[mask], %%xmm7\n\t" /* Preload mask */ : : [mask] "m" (*bshuf_mask) : "memory"); /* Load state.. ABEF_SAVE => STATE0 XMM1, CDGH_STATE => STATE1 XMM2 */ asm volatile ("movups 16(%[state]), %%xmm1\n\t" /* HGFE (xmm=EFGH) */ "movups 0(%[state]), %%xmm0\n\t" /* DCBA (xmm=ABCD) */ "movaps %%xmm1, %%xmm2\n\t" "shufps $0x11, %%xmm0, %%xmm1\n\t" /* ABEF (xmm=FEBA) */ "shufps $0xbb, %%xmm0, %%xmm2\n\t" /* CDGH (xmm=HGDC) */ : : [state] "r" (state) : "memory" ); /* Load message */ asm volatile ("movdqu 0*16(%[data]), %%xmm3\n\t" "movdqu 1*16(%[data]), %%xmm4\n\t" "movdqu 2*16(%[data]), %%xmm5\n\t" "movdqu 3*16(%[data]), %%xmm6\n\t" "pshufb %%xmm7, %%xmm3\n\t" "pshufb %%xmm7, %%xmm4\n\t" "pshufb %%xmm7, %%xmm5\n\t" "pshufb %%xmm7, %%xmm6\n\t" : : [data] "r" (data) : "memory" ); data += 64; do { /* Save state */ asm volatile ("movdqa %%xmm1, (%[abef_save])\n\t" "movdqa %%xmm2, (%[cdgh_save])\n\t" : : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save) : "memory" ); /* Round 0..3 */ asm volatile ("movdqa %%xmm3, %%xmm0\n\t" "paddd %[constants], %%xmm0\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "psrldq $8, %%xmm0\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" : : [constants] "m" (K[0].a) : "memory" ); /* Round 4..7 */ asm volatile ("movdqa %%xmm4, %%xmm0\n\t" "paddd %[constants], %%xmm0\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "psrldq $8, %%xmm0\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" "sha256msg1 %%xmm4, %%xmm3\n\t" : : [constants] "m" (K[1].a) : "memory" ); /* Round 8..11 */ asm volatile ("movdqa %%xmm5, %%xmm0\n\t" "paddd %[constants], %%xmm0\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "psrldq $8, %%xmm0\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" "sha256msg1 %%xmm5, %%xmm4\n\t" : : [constants] "m" (K[2].a) : "memory" ); #define ROUND(k, MSG0, MSG1, MSG2, MSG3) \ asm volatile ("movdqa %%"MSG0", %%xmm0\n\t" \ "paddd %[constants], %%xmm0\n\t" \ "sha256rnds2 %%xmm1, %%xmm2\n\t" \ "movdqa %%"MSG0", %%xmm7\n\t" \ "palignr $4, %%"MSG3", %%xmm7\n\t" \ "paddd %%xmm7, %%"MSG1"\n\t" \ "sha256msg2 %%"MSG0", %%"MSG1"\n\t" \ "psrldq $8, %%xmm0\n\t" \ "sha256rnds2 %%xmm2, %%xmm1\n\t" \ "sha256msg1 %%"MSG0", %%"MSG3"\n\t" \ : \ : [constants] "m" (K[k].a) \ : "memory" ) /* Rounds 12..15 to 48..51 */ ROUND(3, "xmm6", "xmm3", "xmm4", "xmm5"); ROUND(4, "xmm3", "xmm4", "xmm5", "xmm6"); ROUND(5, "xmm4", "xmm5", "xmm6", "xmm3"); ROUND(6, "xmm5", "xmm6", "xmm3", "xmm4"); ROUND(7, "xmm6", "xmm3", "xmm4", "xmm5"); ROUND(8, "xmm3", "xmm4", "xmm5", "xmm6"); ROUND(9, "xmm4", "xmm5", "xmm6", "xmm3"); ROUND(10, "xmm5", "xmm6", "xmm3", "xmm4"); ROUND(11, "xmm6", "xmm3", "xmm4", "xmm5"); ROUND(12, "xmm3", "xmm4", "xmm5", "xmm6"); if (--nblks == 0) break; /* Round 52..55 */ asm volatile ("movdqa %%xmm4, %%xmm0\n\t" "paddd %[constants], %%xmm0\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "movdqa %%xmm4, %%xmm7\n\t" "palignr $4, %%xmm3, %%xmm7\n\t" "movdqu 0*16(%[data]), %%xmm3\n\t" "paddd %%xmm7, %%xmm5\n\t" "sha256msg2 %%xmm4, %%xmm5\n\t" "psrldq $8, %%xmm0\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" : : [constants] "m" (K[13].a), [data] "r" (data) : "memory" ); /* Round 56..59 */ asm volatile ("movdqa %%xmm5, %%xmm0\n\t" "paddd %[constants], %%xmm0\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "movdqa %%xmm5, %%xmm7\n\t" "palignr $4, %%xmm4, %%xmm7\n\t" "movdqu 1*16(%[data]), %%xmm4\n\t" "paddd %%xmm7, %%xmm6\n\t" "movdqa %[mask], %%xmm7\n\t" /* Reload mask */ "sha256msg2 %%xmm5, %%xmm6\n\t" "movdqu 2*16(%[data]), %%xmm5\n\t" "psrldq $8, %%xmm0\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" : : [constants] "m" (K[14].a), [mask] "m" (*bshuf_mask), [data] "r" (data) : "memory" ); /* Round 60..63 */ asm volatile ("movdqa %%xmm6, %%xmm0\n\t" "pshufb %%xmm7, %%xmm3\n\t" "movdqu 3*16(%[data]), %%xmm6\n\t" "paddd %[constants], %%xmm0\n\t" "pshufb %%xmm7, %%xmm4\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "psrldq $8, %%xmm0\n\t" "pshufb %%xmm7, %%xmm5\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" : : [constants] "m" (K[15].a), [data] "r" (data) : "memory" ); data += 64; /* Merge states */ asm volatile ("paddd (%[abef_save]), %%xmm1\n\t" "paddd (%[cdgh_save]), %%xmm2\n\t" "pshufb %%xmm7, %%xmm6\n\t" : : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save) : "memory" ); } while (1); /* Round 52..55 */ asm volatile ("movdqa %%xmm4, %%xmm0\n\t" "paddd %[constants], %%xmm0\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "movdqa %%xmm4, %%xmm7\n\t" "palignr $4, %%xmm3, %%xmm7\n\t" "paddd %%xmm7, %%xmm5\n\t" "sha256msg2 %%xmm4, %%xmm5\n\t" "psrldq $8, %%xmm0\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" : : [constants] "m" (K[13].a) : "memory" ); /* Round 56..59 */ asm volatile ("movdqa %%xmm5, %%xmm0\n\t" "paddd %[constants], %%xmm0\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "movdqa %%xmm5, %%xmm7\n\t" "palignr $4, %%xmm4, %%xmm7\n\t" "paddd %%xmm7, %%xmm6\n\t" "movdqa %[mask], %%xmm7\n\t" /* Reload mask */ "sha256msg2 %%xmm5, %%xmm6\n\t" "psrldq $8, %%xmm0\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" : : [constants] "m" (K[14].a), [mask] "m" (*bshuf_mask) : "memory" ); /* Round 60..63 */ asm volatile ("movdqa %%xmm6, %%xmm0\n\t" "paddd %[constants], %%xmm0\n\t" "sha256rnds2 %%xmm1, %%xmm2\n\t" "psrldq $8, %%xmm0\n\t" "sha256rnds2 %%xmm2, %%xmm1\n\t" : : [constants] "m" (K[15].a) : "memory" ); /* Merge states */ asm volatile ("paddd (%[abef_save]), %%xmm1\n\t" "paddd (%[cdgh_save]), %%xmm2\n\t" : : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save) : "memory" ); /* Save state (XMM1=FEBA, XMM2=HGDC) */ asm volatile ("movaps %%xmm1, %%xmm0\n\t" "shufps $0x11, %%xmm2, %%xmm1\n\t" /* xmm=ABCD */ "shufps $0xbb, %%xmm2, %%xmm0\n\t" /* xmm=EFGH */ "movups %%xmm1, 16(%[state])\n\t" "movups %%xmm0, 0(%[state])\n\t" : : [state] "r" (state) : "memory" ); shaext_cleanup (abef_save, cdgh_save); return 0; } #if __clang__ # pragma clang attribute pop #endif #endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */ diff --git a/configure.ac b/configure.ac index c9cbdefc..af68e61b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,2818 +1,2829 @@ # Configure.ac script for Libgcrypt # Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, # 2007, 2008, 2009, 2011 Free Software Foundation, Inc. # Copyright (C) 2012-2017 g10 Code GmbH # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # (Process this file with autoconf to produce a configure script.) AC_REVISION($Revision$) AC_PREREQ(2.60) min_automake_version="1.14" # To build a release you need to create a tag with the version number # (git tag -s libgcrypt-n.m.k) and run "./autogen.sh --force". Please # bump the version number immediately after the release and do another # commit and push so that the git magic is able to work. See below # for the LT versions. m4_define([mym4_package],[libgcrypt]) m4_define([mym4_major], [1]) m4_define([mym4_minor], [9]) m4_define([mym4_micro], [0]) # Below is m4 magic to extract and compute the git revision number, # the decimalized short revision number, a beta version string and a # flag indicating a development version (mym4_isbeta). Note that the # m4 processing is done by autoconf and not during the configure run. m4_define([mym4_verslist], m4_split(m4_esyscmd([./autogen.sh --find-version] \ mym4_package mym4_major mym4_minor mym4_micro),[:])) m4_define([mym4_isbeta], m4_argn(2, mym4_verslist)) m4_define([mym4_version], m4_argn(4, mym4_verslist)) m4_define([mym4_revision], m4_argn(7, mym4_verslist)) m4_define([mym4_revision_dec], m4_argn(8, mym4_verslist)) m4_esyscmd([echo ]mym4_version[>VERSION]) AC_INIT([mym4_package],[mym4_version], [https://bugs.gnupg.org]) # LT Version numbers, remember to change them just *before* a release. # (Code changed: REVISION++) # (Interfaces added/removed/changed: CURRENT++, REVISION=0) # (Interfaces added: AGE++) # (Interfaces removed: AGE=0) # # (Interfaces removed: CURRENT++, AGE=0, REVISION=0) # (Interfaces added: CURRENT++, AGE++, REVISION=0) # (No interfaces changed: REVISION++) LIBGCRYPT_LT_CURRENT=23 LIBGCRYPT_LT_AGE=3 LIBGCRYPT_LT_REVISION=0 ################################################ AC_SUBST(LIBGCRYPT_LT_CURRENT) AC_SUBST(LIBGCRYPT_LT_AGE) AC_SUBST(LIBGCRYPT_LT_REVISION) # If the API is changed in an incompatible way: increment the next counter. # # 1.6: ABI and API change but the change is to most users irrelevant # and thus the API version number has not been incremented. LIBGCRYPT_CONFIG_API_VERSION=1 # If you change the required gpg-error version, please remove # unnecessary error code defines in src/gcrypt-int.h. NEED_GPG_ERROR_VERSION=1.25 AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_SRCDIR([src/libgcrypt.vers]) AM_INIT_AUTOMAKE([serial-tests dist-bzip2]) AC_CONFIG_HEADER(config.h) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_LIBOBJ_DIR([compat]) AC_CANONICAL_HOST AM_MAINTAINER_MODE AM_SILENT_RULES AC_ARG_VAR(SYSROOT,[locate config scripts also below that directory]) AH_TOP([ #ifndef _GCRYPT_CONFIG_H_INCLUDED #define _GCRYPT_CONFIG_H_INCLUDED /* Enable gpg-error's strerror macro for W32CE. */ #define GPG_ERR_ENABLE_ERRNO_MACROS 1 ]) AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK # if defined(__BIG_ENDIAN__) # define WORDS_BIGENDIAN 1 # elif defined(__LITTLE_ENDIAN__) # undef WORDS_BIGENDIAN # else # error "No endianness found" # endif #endif /*DISABLED_ENDIAN_CHECK*/ /* We basically use the original Camellia source. Make sure the symbols properly prefixed. */ #define CAMELLIA_EXT_SYM_PREFIX _gcry_ #endif /*_GCRYPT_CONFIG_H_INCLUDED*/ ]) AH_VERBATIM([_REENTRANT], [/* To allow the use of Libgcrypt in multithreaded programs we have to use special features from the library. */ #ifndef _REENTRANT # define _REENTRANT 1 #endif ]) ###################### ## Basic checks. ### (we need some results later on (e.g. $GCC) ###################### AC_PROG_MAKE_SET missing_dir=`cd $ac_aux_dir && pwd` AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) # AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir) AC_PROG_CC AC_PROG_CPP AM_PROG_CC_C_O AM_PROG_AS AC_ISC_POSIX AC_PROG_INSTALL AC_PROG_AWK AC_GNU_SOURCE # Taken from mpfr-4.0.1, then modified for LDADD_FOR_TESTS_KLUDGE dnl Under Linux, make sure that the old dtags are used if LD_LIBRARY_PATH dnl is defined. The issue is that with the new dtags, LD_LIBRARY_PATH has dnl the precedence over the run path, so that if a compatible MPFR library dnl is installed in some directory from $LD_LIBRARY_PATH, then the tested dnl MPFR library will be this library instead of the MPFR library from the dnl build tree. Other OS with the same issue might be added later. dnl dnl References: dnl https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=859732 dnl http://lists.gnu.org/archive/html/libtool/2017-05/msg00000.html dnl dnl We need to check whether --disable-new-dtags is supported as alternate dnl linkers may be used (e.g., with tcc: CC=tcc LD=tcc). dnl case $host in *-*-linux*) if test -n "$LD_LIBRARY_PATH"; then saved_LDFLAGS="$LDFLAGS" LDADD_FOR_TESTS_KLUDGE="-Wl,--disable-new-dtags" LDFLAGS="$LDFLAGS $LDADD_FOR_TESTS_KLUDGE" AC_MSG_CHECKING(whether --disable-new-dtags is supported by the linker) AC_LINK_IFELSE([AC_LANG_SOURCE([[ int main (void) { return 0; } ]])], [AC_MSG_RESULT(yes (use it since LD_LIBRARY_PATH is set))], [AC_MSG_RESULT(no) LDADD_FOR_TESTS_KLUDGE="" ]) LDFLAGS="$saved_LDFLAGS" fi ;; esac AC_SUBST([LDADD_FOR_TESTS_KLUDGE]) VERSION_NUMBER=m4_esyscmd(printf "0x%02x%02x%02x" mym4_major \ mym4_minor mym4_micro) AC_SUBST(VERSION_NUMBER) # We need to compile and run a program on the build machine. A # comment in libgpg-error says that the AC_PROG_CC_FOR_BUILD macro in # the AC archive is broken for autoconf 2.57. Given that there is no # newer version of that macro, we assume that it is also broken for # autoconf 2.61 and thus we use a simple but usually sufficient # approach. AC_MSG_CHECKING(for cc for build) if test "$cross_compiling" = "yes"; then CC_FOR_BUILD="${CC_FOR_BUILD-cc}" else CC_FOR_BUILD="${CC_FOR_BUILD-$CC}" fi AC_MSG_RESULT($CC_FOR_BUILD) AC_ARG_VAR(CC_FOR_BUILD,[build system C compiler]) LT_PREREQ([2.2.6]) LT_INIT([win32-dll disable-static]) LT_LANG([Windows Resource]) ########################## ## General definitions. ## ########################## # Used by libgcrypt-config LIBGCRYPT_CONFIG_LIBS="-lgcrypt" LIBGCRYPT_CONFIG_CFLAGS="" LIBGCRYPT_CONFIG_HOST="$host" # Definitions for symmetric ciphers. available_ciphers="arcfour blowfish cast5 des aes twofish serpent rfc2268 seed" available_ciphers="$available_ciphers camellia idea salsa20 gost28147 chacha20" enabled_ciphers="" # Definitions for public-key ciphers. available_pubkey_ciphers="dsa elgamal rsa ecc" enabled_pubkey_ciphers="" # Definitions for message digests. available_digests="crc gostr3411-94 md2 md4 md5 rmd160 sha1 sha256 sha512" available_digests="$available_digests sha3 tiger whirlpool stribog blake2" available_digests="$available_digests sm3" enabled_digests="" # Definitions for kdfs (optional ones) available_kdfs="s2k pkdf2 scrypt" enabled_kdfs="" # Definitions for random modules. available_random_modules="linux egd unix" auto_random_modules="$available_random_modules" # Supported thread backends. LIBGCRYPT_THREAD_MODULES="" # Other definitions. have_w32_system=no have_w32ce_system=no have_pthread=no # Setup some stuff depending on host. case "${host}" in *-*-mingw32*) ac_cv_have_dev_random=no have_w32_system=yes case "${host}" in *-mingw32ce*) have_w32ce_system=yes available_random_modules="w32ce" ;; *) available_random_modules="w32" ;; esac AC_DEFINE(USE_ONLY_8DOT3,1, [set this to limit filenames to the 8.3 format]) AC_DEFINE(HAVE_DRIVE_LETTERS,1, [defined if we must run on a stupid file system]) AC_DEFINE(HAVE_DOSISH_SYSTEM,1, [defined if we run on some of the PCDOS like systems (DOS, Windoze. OS/2) with special properties like no file modes]) ;; i?86-emx-os2 | i?86-*-os2*emx) # OS/2 with the EMX environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; i?86-*-msdosdjgpp*) # DOS with the DJGPP environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; *-*-hpux*) if test -z "$GCC" ; then CFLAGS="$CFLAGS -Ae -D_HPUX_SOURCE" fi ;; *-dec-osf4*) if test -z "$GCC" ; then # Suppress all warnings # to get rid of the unsigned/signed char mismatch warnings. CFLAGS="$CFLAGS -w" fi ;; m68k-atari-mint) ;; *-apple-darwin*) AC_DEFINE(_DARWIN_C_SOURCE, 900000L, Expose all libc features (__DARWIN_C_FULL).) ;; *) ;; esac if test "$have_w32_system" = yes; then AC_DEFINE(HAVE_W32_SYSTEM,1, [Defined if we run on a W32 API based system]) if test "$have_w32ce_system" = yes; then AC_DEFINE(HAVE_W32CE_SYSTEM,1,[Defined if we run on WindowsCE]) fi fi AM_CONDITIONAL(HAVE_W32_SYSTEM, test "$have_w32_system" = yes) AM_CONDITIONAL(HAVE_W32CE_SYSTEM, test "$have_w32ce_system" = yes) # A printable OS Name is sometimes useful. case "${host}" in *-*-mingw32ce*) PRINTABLE_OS_NAME="W32CE" ;; *-*-mingw32*) PRINTABLE_OS_NAME="W32" ;; i?86-emx-os2 | i?86-*-os2*emx ) PRINTABLE_OS_NAME="OS/2" ;; i?86-*-msdosdjgpp*) PRINTABLE_OS_NAME="MSDOS/DJGPP" ;; *-linux*) PRINTABLE_OS_NAME="GNU/Linux" ;; *) PRINTABLE_OS_NAME=`uname -s || echo "Unknown"` ;; esac NAME_OF_DEV_RANDOM="/dev/random" NAME_OF_DEV_URANDOM="/dev/urandom" AC_ARG_ENABLE(endian-check, AC_HELP_STRING([--disable-endian-check], [disable the endian check and trust the OS provided macros]), endiancheck=$enableval,endiancheck=yes) if test x"$endiancheck" = xyes ; then AC_C_BIGENDIAN else AC_DEFINE(DISABLED_ENDIAN_CHECK,1,[configure did not test for endianness]) fi AC_CHECK_SIZEOF(unsigned short, 2) AC_CHECK_SIZEOF(unsigned int, 4) AC_CHECK_SIZEOF(unsigned long, 4) AC_CHECK_SIZEOF(unsigned long long, 0) AC_CHECK_SIZEOF(void *, 0) AC_TYPE_UINTPTR_T if test "$ac_cv_sizeof_unsigned_short" = "0" \ || test "$ac_cv_sizeof_unsigned_int" = "0" \ || test "$ac_cv_sizeof_unsigned_long" = "0"; then AC_MSG_WARN([Hmmm, something is wrong with the sizes - using defaults]); fi # Ensure that we have UINT64_C before we bother to check for uint64_t AC_CACHE_CHECK([for UINT64_C],[gnupg_cv_uint64_c_works], AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[uint64_t foo=UINT64_C(42);]])], gnupg_cv_uint64_c_works=yes,gnupg_cv_uint64_c_works=no)) if test "$gnupg_cv_uint64_c_works" = "yes" ; then AC_CHECK_SIZEOF(uint64_t) fi # Do we have any 64-bit data types? if test "$ac_cv_sizeof_unsigned_int" != "8" \ && test "$ac_cv_sizeof_unsigned_long" != "8" \ && test "$ac_cv_sizeof_unsigned_long_long" != "8" \ && test "$ac_cv_sizeof_uint64_t" != "8"; then AC_MSG_ERROR([[ *** *** No 64-bit integer type available. *** It is not possible to build Libgcrypt on this platform. ***]]) fi # If not specified otherwise, all available algorithms will be # included. default_ciphers="$available_ciphers" default_pubkey_ciphers="$available_pubkey_ciphers" default_digests="$available_digests" default_kdfs="$available_kdfs" # Blacklist MD2 by default default_digests=`echo $default_digests | sed -e 's/md2//g'` # Substitutions to set generated files in a Emacs buffer to read-only. AC_SUBST(emacs_local_vars_begin, ['Local Variables:']) AC_SUBST(emacs_local_vars_read_only, ['buffer-read-only: t']) AC_SUBST(emacs_local_vars_end, ['End:']) ############################ ## Command line switches. ## ############################ # Implementation of the --enable-ciphers switch. AC_ARG_ENABLE(ciphers, AC_HELP_STRING([--enable-ciphers=ciphers], [select the symmetric ciphers to include]), [enabled_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_ciphers=""]) if test "x$enabled_ciphers" = "x" \ -o "$enabled_ciphers" = "yes" \ -o "$enabled_ciphers" = "no"; then enabled_ciphers=$default_ciphers fi AC_MSG_CHECKING([which symmetric ciphers to include]) for cipher in $enabled_ciphers; do LIST_MEMBER($cipher, $available_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported cipher "$cipher" specified]) fi done AC_MSG_RESULT([$enabled_ciphers]) # Implementation of the --enable-pubkey-ciphers switch. AC_ARG_ENABLE(pubkey-ciphers, AC_HELP_STRING([--enable-pubkey-ciphers=ciphers], [select the public-key ciphers to include]), [enabled_pubkey_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_pubkey_ciphers=""]) if test "x$enabled_pubkey_ciphers" = "x" \ -o "$enabled_pubkey_ciphers" = "yes" \ -o "$enabled_pubkey_ciphers" = "no"; then enabled_pubkey_ciphers=$default_pubkey_ciphers fi AC_MSG_CHECKING([which public-key ciphers to include]) for cipher in $enabled_pubkey_ciphers; do LIST_MEMBER($cipher, $available_pubkey_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported public-key cipher specified]) fi done AC_MSG_RESULT([$enabled_pubkey_ciphers]) # Implementation of the --enable-digests switch. AC_ARG_ENABLE(digests, AC_HELP_STRING([--enable-digests=digests], [select the message digests to include]), [enabled_digests=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_digests=""]) if test "x$enabled_digests" = "x" \ -o "$enabled_digests" = "yes" \ -o "$enabled_digests" = "no"; then enabled_digests=$default_digests fi AC_MSG_CHECKING([which message digests to include]) for digest in $enabled_digests; do LIST_MEMBER($digest, $available_digests) if test "$found" = "0"; then AC_MSG_ERROR([unsupported message digest specified]) fi done AC_MSG_RESULT([$enabled_digests]) # Implementation of the --enable-kdfs switch. AC_ARG_ENABLE(kdfs, AC_HELP_STRING([--enable-kfds=kdfs], [select the KDFs to include]), [enabled_kdfs=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_kdfs=""]) if test "x$enabled_kdfs" = "x" \ -o "$enabled_kdfs" = "yes" \ -o "$enabled_kdfs" = "no"; then enabled_kdfs=$default_kdfs fi AC_MSG_CHECKING([which key derivation functions to include]) for kdf in $enabled_kdfs; do LIST_MEMBER($kdf, $available_kdfs) if test "$found" = "0"; then AC_MSG_ERROR([unsupported key derivation function specified]) fi done AC_MSG_RESULT([$enabled_kdfs]) # Implementation of the --enable-random switch. AC_ARG_ENABLE(random, AC_HELP_STRING([--enable-random=name], [select which random number generator to use]), [random=`echo $enableval | tr '[A-Z]' '[a-z]'`], []) if test "x$random" = "x" -o "$random" = "yes" -o "$random" = "no"; then random=default fi AC_MSG_CHECKING([which random module to use]) if test "$random" != "default" -a "$random" != "auto"; then LIST_MEMBER($random, $available_random_modules) if test "$found" = "0"; then AC_MSG_ERROR([unsupported random module specified]) fi fi AC_MSG_RESULT($random) # Implementation of the --disable-dev-random switch. AC_MSG_CHECKING([whether use of /dev/random is requested]) AC_ARG_ENABLE(dev-random, [ --disable-dev-random disable the use of dev random], try_dev_random=$enableval, try_dev_random=yes) AC_MSG_RESULT($try_dev_random) # Implementation of the --with-egd-socket switch. AC_ARG_WITH(egd-socket, [ --with-egd-socket=NAME Use NAME for the EGD socket)], egd_socket_name="$withval", egd_socket_name="" ) AC_DEFINE_UNQUOTED(EGD_SOCKET_NAME, "$egd_socket_name", [Define if you don't want the default EGD socket name. For details see cipher/rndegd.c]) # Implementation of the --enable-random-daemon AC_MSG_CHECKING([whether the experimental random daemon is requested]) AC_ARG_ENABLE([random-daemon], AC_HELP_STRING([--enable-random-daemon], [Build and support the experimental gcryptrnd]), [use_random_daemon=$enableval], [use_random_daemon=no]) AC_MSG_RESULT($use_random_daemon) if test x$use_random_daemon = xyes ; then AC_DEFINE(USE_RANDOM_DAEMON,1, [Define to support the experimental random daemon]) fi AM_CONDITIONAL(USE_RANDOM_DAEMON, test x$use_random_daemon = xyes) # Implementation of --disable-asm. AC_MSG_CHECKING([whether MPI assembler modules are requested]) AC_ARG_ENABLE([asm], AC_HELP_STRING([--disable-asm], [Disable MPI assembler modules]), [try_asm_modules=$enableval], [try_asm_modules=yes]) AC_MSG_RESULT($try_asm_modules) # Implementation of the --enable-m-guard switch. AC_MSG_CHECKING([whether memory guard is requested]) AC_ARG_ENABLE(m-guard, AC_HELP_STRING([--enable-m-guard], [Enable memory guard facility]), [use_m_guard=$enableval], [use_m_guard=no]) AC_MSG_RESULT($use_m_guard) if test "$use_m_guard" = yes ; then AC_DEFINE(M_GUARD,1,[Define to use the (obsolete) malloc guarding feature]) fi # Implementation of the --enable-large-data-tests switch. AC_MSG_CHECKING([whether to run large data tests]) AC_ARG_ENABLE(large-data-tests, AC_HELP_STRING([--enable-large-data-tests], [Enable the real long ruinning large data tests]), large_data_tests=$enableval,large_data_tests=no) AC_MSG_RESULT($large_data_tests) AC_SUBST(RUN_LARGE_DATA_TESTS, $large_data_tests) # Implementation of the --with-capabilities switch. # Check whether we want to use Linux capabilities AC_MSG_CHECKING([whether use of capabilities is requested]) AC_ARG_WITH(capabilities, AC_HELP_STRING([--with-capabilities], [Use linux capabilities [default=no]]), [use_capabilities="$withval"],[use_capabilities=no]) AC_MSG_RESULT($use_capabilities) # Implementation of the --enable-hmac-binary-check. AC_MSG_CHECKING([whether a HMAC binary check is requested]) AC_ARG_ENABLE(hmac-binary-check, AC_HELP_STRING([--enable-hmac-binary-check], [Enable library integrity check]), [use_hmac_binary_check=$enableval], [use_hmac_binary_check=no]) AC_MSG_RESULT($use_hmac_binary_check) if test "$use_hmac_binary_check" = yes ; then AC_DEFINE(ENABLE_HMAC_BINARY_CHECK,1, [Define to support an HMAC based integrity check]) fi # Implementation of the --disable-jent-support switch. AC_MSG_CHECKING([whether jitter entropy support is requested]) AC_ARG_ENABLE(jent-support, AC_HELP_STRING([--disable-jent-support], [Disable support for the Jitter entropy collector]), jentsupport=$enableval,jentsupport=yes) AC_MSG_RESULT($jentsupport) # Implementation of the --disable-padlock-support switch. AC_MSG_CHECKING([whether padlock support is requested]) AC_ARG_ENABLE(padlock-support, AC_HELP_STRING([--disable-padlock-support], [Disable support for the PadLock Engine of VIA processors]), padlocksupport=$enableval,padlocksupport=yes) AC_MSG_RESULT($padlocksupport) # Implementation of the --disable-aesni-support switch. AC_MSG_CHECKING([whether AESNI support is requested]) AC_ARG_ENABLE(aesni-support, AC_HELP_STRING([--disable-aesni-support], [Disable support for the Intel AES-NI instructions]), aesnisupport=$enableval,aesnisupport=yes) AC_MSG_RESULT($aesnisupport) # Implementation of the --disable-shaext-support switch. AC_MSG_CHECKING([whether SHAEXT support is requested]) AC_ARG_ENABLE(shaext-support, AC_HELP_STRING([--disable-shaext-support], [Disable support for the Intel SHAEXT instructions]), shaextsupport=$enableval,shaextsupport=yes) AC_MSG_RESULT($shaextsupport) # Implementation of the --disable-pclmul-support switch. AC_MSG_CHECKING([whether PCLMUL support is requested]) AC_ARG_ENABLE(pclmul-support, AC_HELP_STRING([--disable-pclmul-support], [Disable support for the Intel PCLMUL instructions]), pclmulsupport=$enableval,pclmulsupport=yes) AC_MSG_RESULT($pclmulsupport) # Implementation of the --disable-sse41-support switch. AC_MSG_CHECKING([whether SSE4.1 support is requested]) AC_ARG_ENABLE(sse41-support, AC_HELP_STRING([--disable-sse41-support], [Disable support for the Intel SSE4.1 instructions]), sse41support=$enableval,sse41support=yes) AC_MSG_RESULT($sse41support) # Implementation of the --disable-drng-support switch. AC_MSG_CHECKING([whether DRNG support is requested]) AC_ARG_ENABLE(drng-support, AC_HELP_STRING([--disable-drng-support], [Disable support for the Intel DRNG (RDRAND instruction)]), drngsupport=$enableval,drngsupport=yes) AC_MSG_RESULT($drngsupport) # Implementation of the --disable-avx-support switch. AC_MSG_CHECKING([whether AVX support is requested]) AC_ARG_ENABLE(avx-support, AC_HELP_STRING([--disable-avx-support], [Disable support for the Intel AVX instructions]), avxsupport=$enableval,avxsupport=yes) AC_MSG_RESULT($avxsupport) # Implementation of the --disable-avx2-support switch. AC_MSG_CHECKING([whether AVX2 support is requested]) AC_ARG_ENABLE(avx2-support, AC_HELP_STRING([--disable-avx2-support], [Disable support for the Intel AVX2 instructions]), avx2support=$enableval,avx2support=yes) AC_MSG_RESULT($avx2support) # Implementation of the --disable-neon-support switch. AC_MSG_CHECKING([whether NEON support is requested]) AC_ARG_ENABLE(neon-support, AC_HELP_STRING([--disable-neon-support], [Disable support for the ARM NEON instructions]), neonsupport=$enableval,neonsupport=yes) AC_MSG_RESULT($neonsupport) # Implementation of the --disable-arm-crypto-support switch. AC_MSG_CHECKING([whether ARMv8 Crypto Extension support is requested]) AC_ARG_ENABLE(arm-crypto-support, AC_HELP_STRING([--disable-arm-crypto-support], [Disable support for the ARMv8 Crypto Extension instructions]), armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], AC_HELP_STRING([--disable-O-flag-munging], [Disable modification of the cc -O flag]), [enable_o_flag_munging=$enableval], [enable_o_flag_munging=yes]) AC_MSG_RESULT($enable_o_flag_munging) AM_CONDITIONAL(ENABLE_O_FLAG_MUNGING, test "$enable_o_flag_munging" = "yes") +# Implementation of the --disable-instrumentation-munging switch. +AC_MSG_CHECKING([whether a instrumentation (-fprofile, -fsanitize) munging is requested]) +AC_ARG_ENABLE([instrumentation-munging], + AC_HELP_STRING([--disable-instrumentation-munging], + [Disable modification of the cc instrumentation options]), + [enable_instrumentation_munging=$enableval], + [enable_instrumentation_munging=yes]) +AC_MSG_RESULT($enable_instrumentation_munging) +AM_CONDITIONAL(ENABLE_INSTRUMENTATION_MUNGING, + test "$enable_instrumentation_munging" = "yes") + # Implementation of the --disable-amd64-as-feature-detection switch. AC_MSG_CHECKING([whether to enable AMD64 as(1) feature detection]) AC_ARG_ENABLE(amd64-as-feature-detection, AC_HELP_STRING([--disable-amd64-as-feature-detection], [Disable the auto-detection of AMD64 as(1) features]), amd64_as_feature_detection=$enableval, amd64_as_feature_detection=yes) AC_MSG_RESULT($amd64_as_feature_detection) AC_DEFINE_UNQUOTED(PRINTABLE_OS_NAME, "$PRINTABLE_OS_NAME", [A human readable text with the name of the OS]) # For some systems we know that we have ld_version scripts. # Use it then as default. have_ld_version_script=no case "${host}" in *-*-linux*) have_ld_version_script=yes ;; *-*-gnu*) have_ld_version_script=yes ;; esac AC_ARG_ENABLE([ld-version-script], AC_HELP_STRING([--enable-ld-version-script], [enable/disable use of linker version script. (default is system dependent)]), [have_ld_version_script=$enableval], [ : ] ) AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$have_ld_version_script" = "yes") AC_DEFINE_UNQUOTED(NAME_OF_DEV_RANDOM, "$NAME_OF_DEV_RANDOM", [defined to the name of the strong random device]) AC_DEFINE_UNQUOTED(NAME_OF_DEV_URANDOM, "$NAME_OF_DEV_URANDOM", [defined to the name of the weaker random device]) ############################### #### Checks for libraries. #### ############################### # # gpg-error is required. # AM_PATH_GPG_ERROR("$NEED_GPG_ERROR_VERSION") if test "x$GPG_ERROR_LIBS" = "x"; then AC_MSG_ERROR([libgpg-error is needed. See ftp://ftp.gnupg.org/gcrypt/libgpg-error/ .]) fi AC_DEFINE(GPG_ERR_SOURCE_DEFAULT, GPG_ERR_SOURCE_GCRYPT, [The default error source for libgcrypt.]) # # Check whether the GNU Pth library is available. We require this # to build the optional gcryptrnd program. # AC_ARG_WITH(pth-prefix, AC_HELP_STRING([--with-pth-prefix=PFX], [prefix where GNU Pth is installed (optional)]), pth_config_prefix="$withval", pth_config_prefix="") if test x$pth_config_prefix != x ; then PTH_CONFIG="$pth_config_prefix/bin/pth-config" fi if test "$use_random_daemon" = "yes"; then AC_PATH_PROG(PTH_CONFIG, pth-config, no) if test "$PTH_CONFIG" = "no"; then AC_MSG_WARN([[ *** *** To build the Libgcrypt's random number daemon *** we need the support of the GNU Portable Threads Library. *** Download it from ftp://ftp.gnu.org/gnu/pth/ *** On a Debian GNU/Linux system you might want to try *** apt-get install libpth-dev ***]]) else GNUPG_PTH_VERSION_CHECK([1.3.7]) if test $have_pth = yes; then PTH_CFLAGS=`$PTH_CONFIG --cflags` PTH_LIBS=`$PTH_CONFIG --ldflags` PTH_LIBS="$PTH_LIBS `$PTH_CONFIG --libs --all`" AC_DEFINE(USE_GNU_PTH, 1, [Defined if the GNU Portable Thread Library should be used]) AC_DEFINE(HAVE_PTH, 1, [Defined if the GNU Pth is available]) fi fi fi AC_SUBST(PTH_CFLAGS) AC_SUBST(PTH_LIBS) # # Check whether pthreads is available # if test "$have_w32_system" != yes; then AC_CHECK_LIB(pthread,pthread_create,have_pthread=yes) if test "$have_pthread" = yes; then AC_DEFINE(HAVE_PTHREAD, 1 ,[Define if we have pthread.]) fi fi # Solaris needs -lsocket and -lnsl. Unisys system includes # gethostbyname in libsocket but needs libnsl for socket. AC_SEARCH_LIBS(setsockopt, [socket], , [AC_SEARCH_LIBS(setsockopt, [socket], , , [-lnsl])]) AC_SEARCH_LIBS(setsockopt, [nsl]) ################################## #### Checks for header files. #### ################################## AC_HEADER_STDC AC_CHECK_HEADERS(unistd.h sys/select.h sys/msg.h sys/auxv.h) INSERT_SYS_SELECT_H= if test x"$ac_cv_header_sys_select_h" = xyes; then INSERT_SYS_SELECT_H=" include " fi AC_SUBST(INSERT_SYS_SELECT_H) ########################################## #### Checks for typedefs, structures, #### #### and compiler characteristics. #### ########################################## AC_C_CONST AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_SIGNAL AC_DECL_SYS_SIGLIST AC_TYPE_PID_T GNUPG_CHECK_TYPEDEF(byte, HAVE_BYTE_TYPEDEF) GNUPG_CHECK_TYPEDEF(ushort, HAVE_USHORT_TYPEDEF) GNUPG_CHECK_TYPEDEF(ulong, HAVE_ULONG_TYPEDEF) GNUPG_CHECK_TYPEDEF(u16, HAVE_U16_TYPEDEF) GNUPG_CHECK_TYPEDEF(u32, HAVE_U32_TYPEDEF) gl_TYPE_SOCKLEN_T case "${host}" in *-*-mingw32*) # socklen_t may or may not be defined depending on what headers # are included. To be safe we use int as this is the actual type. FALLBACK_SOCKLEN_T="typedef int gcry_socklen_t;" ;; *) if test ".$gl_cv_socklen_t_equiv" = "."; then FALLBACK_SOCKLEN_T="typedef socklen_t gcry_socklen_t;" else FALLBACK_SOCKLEN_T="typedef ${gl_cv_socklen_t_equiv} gcry_socklen_t;" fi esac AC_SUBST(FALLBACK_SOCKLEN_T) # # Check for __builtin_bswap32 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap32, [gcry_cv_have_builtin_bswap32], [gcry_cv_have_builtin_bswap32=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [int x = 0; int y = __builtin_bswap32(x); return y;])], [gcry_cv_have_builtin_bswap32=yes])]) if test "$gcry_cv_have_builtin_bswap32" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP32,1, [Defined if compiler has '__builtin_bswap32' intrinsic]) fi # # Check for __builtin_bswap64 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap64, [gcry_cv_have_builtin_bswap64], [gcry_cv_have_builtin_bswap64=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [long long x = 0; long long y = __builtin_bswap64(x); return y;])], [gcry_cv_have_builtin_bswap64=yes])]) if test "$gcry_cv_have_builtin_bswap64" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP64,1, [Defined if compiler has '__builtin_bswap64' intrinsic]) fi # # Check for __builtin_ctz intrinsic. # AC_CACHE_CHECK(for __builtin_ctz, [gcry_cv_have_builtin_ctz], [gcry_cv_have_builtin_ctz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_ctz(x); return y;])], [gcry_cv_have_builtin_ctz=yes])]) if test "$gcry_cv_have_builtin_ctz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZ, 1, [Defined if compiler has '__builtin_ctz' intrinsic]) fi # # Check for __sync_synchronize intrinsic. # AC_CACHE_CHECK(for __sync_synchronize, [gcry_cv_have_sync_synchronize], [gcry_cv_have_sync_synchronize=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [__sync_synchronize(); return 0;])], [gcry_cv_have_sync_synchronize=yes])]) if test "$gcry_cv_have_sync_synchronize" = "yes" ; then AC_DEFINE(HAVE_SYNC_SYNCHRONIZE, 1, [Defined if compiler has '__sync_synchronize' intrinsic]) fi # # Check for VLA support (variable length arrays). # AC_CACHE_CHECK(whether the variable length arrays are supported, [gcry_cv_have_vla], [gcry_cv_have_vla=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void f1(char *, int); char foo(int i) { char b[(i < 0 ? 0 : i) + 1]; f1(b, sizeof b); return b[0];}]])], [gcry_cv_have_vla=yes])]) if test "$gcry_cv_have_vla" = "yes" ; then AC_DEFINE(HAVE_VLA,1, [Defined if variable length arrays are supported]) fi # # Check for ELF visibility support. # AC_CACHE_CHECK(whether the visibility attribute is supported, gcry_cv_visibility_attribute, [gcry_cv_visibility_attribute=no AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo __attribute__ ((visibility ("hidden"))) = 1; int bar __attribute__ ((visibility ("protected"))) = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden.*foo' conftest.s >/dev/null 2>&1 ; then if grep '\.protected.*bar' conftest.s >/dev/null 2>&1; then gcry_cv_visibility_attribute=yes fi fi fi ]) if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken visibility attribute, gcry_cv_broken_visibility_attribute, [gcry_cv_broken_visibility_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo (int x); int bar (int x) __asm__ ("foo") __attribute__ ((visibility ("hidden"))); int bar (int x) { return x; } ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden@<:@ _@:>@foo' conftest.s >/dev/null 2>&1; then gcry_cv_broken_visibility_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken alias attribute, gcry_cv_broken_alias_attribute, [gcry_cv_broken_alias_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[extern int foo (int x) __asm ("xyzzy"); int bar (int x) { return x; } extern __typeof (bar) foo __attribute ((weak, alias ("bar"))); extern int dfoo; extern __typeof (dfoo) dfoo __asm ("abccb"); int dfoo = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep 'xyzzy' conftest.s >/dev/null 2>&1 && \ grep 'abccb' conftest.s >/dev/null 2>&1; then gcry_cv_broken_alias_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(if gcc supports -fvisibility=hidden, gcry_cv_gcc_has_f_visibility, [gcry_cv_gcc_has_f_visibility=no _gcc_cflags_save=$CFLAGS CFLAGS="-fvisibility=hidden" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])], gcry_cv_gcc_has_f_visibility=yes) CFLAGS=$_gcc_cflags_save; ]) fi if test "$gcry_cv_visibility_attribute" = "yes" \ && test "$gcry_cv_broken_visibility_attribute" != "yes" \ && test "$gcry_cv_broken_alias_attribute" != "yes" \ && test "$gcry_cv_gcc_has_f_visibility" = "yes" then AC_DEFINE(GCRY_USE_VISIBILITY, 1, [Define to use the GNU C visibility attribute.]) CFLAGS="$CFLAGS -fvisibility=hidden" fi # Following attribute tests depend on warnings to cause compile to fail, # so set -Werror temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether the compiler supports the GCC style aligned attribute # AC_CACHE_CHECK([whether the GCC style aligned attribute is supported], [gcry_cv_gcc_attribute_aligned], [gcry_cv_gcc_attribute_aligned=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct { int a; } foo __attribute__ ((aligned (16)));]])], [gcry_cv_gcc_attribute_aligned=yes])]) if test "$gcry_cv_gcc_attribute_aligned" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_ALIGNED,1, [Defined if a GCC style "__attribute__ ((aligned (n))" is supported]) fi # # Check whether the compiler supports the GCC style packed attribute # AC_CACHE_CHECK([whether the GCC style packed attribute is supported], [gcry_cv_gcc_attribute_packed], [gcry_cv_gcc_attribute_packed=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct foolong_s { long b; } __attribute__ ((packed)); struct foo_s { char a; struct foolong_s b; } __attribute__ ((packed)); enum bar { FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))), };]])], [gcry_cv_gcc_attribute_packed=yes])]) if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1, [Defined if a GCC style "__attribute__ ((packed))" is supported]) fi # # Check whether the compiler supports the GCC style may_alias attribute # AC_CACHE_CHECK([whether the GCC style may_alias attribute is supported], [gcry_cv_gcc_attribute_may_alias], [gcry_cv_gcc_attribute_may_alias=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[typedef struct foo_s { int a; } __attribute__ ((may_alias)) foo_t;]])], [gcry_cv_gcc_attribute_may_alias=yes])]) if test "$gcry_cv_gcc_attribute_may_alias" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MAY_ALIAS,1, [Defined if a GCC style "__attribute__ ((may_alias))" is supported]) fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether the compiler supports 'asm' or '__asm__' keyword for # assembler blocks. # AC_CACHE_CHECK([whether 'asm' assembler keyword is supported], [gcry_cv_have_asm], [gcry_cv_have_asm=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm("":::"memory"); }]])], [gcry_cv_have_asm=yes])]) AC_CACHE_CHECK([whether '__asm__' assembler keyword is supported], [gcry_cv_have___asm__], [gcry_cv_have___asm__=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("":::"memory"); }]])], [gcry_cv_have___asm__=yes])]) if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_DEFINE(asm,__asm__, [Define to supported assembler block keyword, if plain 'asm' was not supported]) fi fi # # Check whether the compiler supports inline assembly memory barrier. # if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__ volatile("":::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi else AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm volatile("":::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_VOLATILE_MEMORY,1, [Define if inline asm memory barrier is supported]) fi # # Check whether GCC assembler supports features needed for our ARM # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly implementations], [gcry_cv_gcc_arm_platform_as_ok], [gcry_cv_gcc_arm_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( /* Test if assembler supports UAL syntax. */ ".syntax unified\n\t" ".arm\n\t" /* our assembly code is in ARM mode */ /* Following causes error if assembler ignored '.syntax unified'. */ "asmfunc:\n\t" "add %r0, %r0, %r4, ror #12;\n\t" /* Test if '.type' and '.size' are supported. */ ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,%function;\n\t" );]])], [gcry_cv_gcc_arm_platform_as_ok=yes])]) if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARM assembly implementations]) fi # # Check whether GCC assembler supports features needed for our ARMv8/Aarch64 # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly implementations], [gcry_cv_gcc_aarch64_platform_as_ok], [gcry_cv_gcc_aarch64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( "asmfunc:\n\t" "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" );]])], [gcry_cv_gcc_aarch64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARMv8/Aarch64 assembly implementations]) fi # # Check whether GCC assembler supports for CFI directives. # AC_CACHE_CHECK([whether GCC assembler supports for CFI directives], [gcry_cv_gcc_asm_cfi_directives], [gcry_cv_gcc_asm_cfi_directives=no AC_LINK_IFELSE([AC_LANG_PROGRAM( [[__asm__( ".text\n\t" "ac_test:\n\t" ".cfi_startproc\n\t" ".cfi_remember_state\n\t" ".cfi_adjust_cfa_offset 8\n\t" ".cfi_rel_offset 0, 8\n\t" ".cfi_def_cfa_register 1\n\t" ".cfi_register 2, 3\n\t" ".cfi_restore 2\n\t" ".cfi_escape 0x0f, 0x02, 0x11, 0x00\n\t" ".cfi_restore_state\n\t" ".long 0\n\t" ".cfi_endproc\n\t" );]])], [gcry_cv_gcc_asm_cfi_directives=yes])]) if test "$gcry_cv_gcc_asm_cfi_directives" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_CFI_DIRECTIVES,1, [Defined if underlying assembler supports for CFI directives]) fi # # Check whether underscores in symbols are required. This needs to be # done before setting up the assembler stuff. # GNUPG_SYS_SYMBOL_UNDERSCORE() ################################# #### #### #### Setup assembler stuff. #### #### Define mpi_cpu_arch. #### #### #### ################################# AC_ARG_ENABLE(mpi-path, AC_HELP_STRING([--enable-mpi-path=EXTRA_PATH], [prepend EXTRA_PATH to list of CPU specific optimizations]), mpi_extra_path="$enableval",mpi_extra_path="") AC_MSG_CHECKING(architecture and mpi assembler functions) if test -f $srcdir/mpi/config.links ; then . $srcdir/mpi/config.links AC_CONFIG_LINKS("$mpi_ln_list") ac_cv_mpi_sflags="$mpi_sflags" AC_MSG_RESULT($mpi_cpu_arch) else AC_MSG_RESULT(failed) AC_MSG_ERROR([mpi/config.links missing!]) fi MPI_SFLAGS="$ac_cv_mpi_sflags" AC_SUBST(MPI_SFLAGS) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_ADD1, test "$mpi_mod_asm_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_SUB1, test "$mpi_mod_asm_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL1, test "$mpi_mod_asm_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL2, test "$mpi_mod_asm_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL3, test "$mpi_mod_asm_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_LSHIFT, test "$mpi_mod_asm_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_RSHIFT, test "$mpi_mod_asm_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV, test "$mpi_mod_asm_udiv" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV_QRNND, test "$mpi_mod_asm_udiv_qrnnd" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_ADD1, test "$mpi_mod_c_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_SUB1, test "$mpi_mod_c_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL1, test "$mpi_mod_c_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL2, test "$mpi_mod_c_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL3, test "$mpi_mod_c_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_LSHIFT, test "$mpi_mod_c_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_RSHIFT, test "$mpi_mod_c_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV, test "$mpi_mod_c_udiv" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes) # Reset non applicable feature flags. if test "$mpi_cpu_arch" != "x86" ; then aesnisupport="n/a" shaextsupport="n/a" pclmulsupport="n/a" sse41support="n/a" avxsupport="n/a" avx2support="n/a" padlocksupport="n/a" jentsupport="n/a" drngsupport="n/a" fi if test "$mpi_cpu_arch" != "arm" ; then if test "$mpi_cpu_arch" != "aarch64" ; then neonsupport="n/a" armcryptosupport="n/a" fi fi ############################################# #### #### #### Platform specific compiler checks. #### #### #### ############################################# # Following tests depend on warnings to cause compile to fail, so set -Werror # temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether compiler supports 'ms_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute], [gcry_cv_gcc_attribute_ms_abi], [gcry_cv_gcc_attribute_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((ms_abi)) proto(int);]])], [gcry_cv_gcc_attribute_ms_abi=yes])]) if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1, [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute]) fi # # Check whether compiler supports 'sysv_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute], [gcry_cv_gcc_attribute_sysv_abi], [gcry_cv_gcc_attribute_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((sysv_abi)) proto(int);]])], [gcry_cv_gcc_attribute_sysv_abi=yes])]) if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1, [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute]) fi # # Check whether default calling convention is 'ms_abi'. # if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'ms_abi'], [gcry_cv_gcc_default_abi_is_ms_abi], [gcry_cv_gcc_default_abi_is_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((ms_abi))(*msabi_func)(void); /* warning on SysV abi targets, passes on Windows based targets */ msabi_func = def_func; return msabi_func; }]])], [gcry_cv_gcc_default_abi_is_ms_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1, [Defined if default calling convention is 'ms_abi']) fi fi # # Check whether default calling convention is 'sysv_abi'. # if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'], [gcry_cv_gcc_default_abi_is_sysv_abi], [gcry_cv_gcc_default_abi_is_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((sysv_abi))(*sysvabi_func)(void); /* warning on MS ABI targets, passes on SysV ABI targets */ sysvabi_func = def_func; return sysvabi_func; }]])], [gcry_cv_gcc_default_abi_is_sysv_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1, [Defined if default calling convention is 'sysv_abi']) fi fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports SSSE3 instructions # This is required for the AES-NI instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSSE3 instructions], [gcry_cv_gcc_inline_asm_ssse3], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_ssse3="n/a" else gcry_cv_gcc_inline_asm_ssse3=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[static unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; void a(void) { __asm__("pshufb %[mask], %%xmm2\n\t"::[mask]"m"(*be_mask):); }]])], [gcry_cv_gcc_inline_asm_ssse3=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ssse3" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSSE3,1, [Defined if inline assembler supports SSSE3 instructions]) fi # # Check whether GCC inline assembler supports PCLMUL instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions], [gcry_cv_gcc_inline_asm_pclmul], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_pclmul="n/a" else gcry_cv_gcc_inline_asm_pclmul=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc"); }]])], [gcry_cv_gcc_inline_asm_pclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1, [Defined if inline assembler supports PCLMUL instructions]) fi # # Check whether GCC inline assembler supports SHA Extensions instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SHA Extensions instructions], [gcry_cv_gcc_inline_asm_shaext], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_shaext="n/a" else gcry_cv_gcc_inline_asm_shaext=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("sha1rnds4 \$0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1nexte %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg2 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256rnds2 %%xmm0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg2 %%xmm1, %%xmm3\n\t":::"cc"); }]])], [gcry_cv_gcc_inline_asm_shaext=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_shaext" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SHAEXT,1, [Defined if inline assembler supports SHA Extensions instructions]) fi # # Check whether GCC inline assembler supports SSE4.1 instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSE4.1 instructions], [gcry_cv_gcc_inline_asm_sse41], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_sse41="n/a" else gcry_cv_gcc_inline_asm_sse41=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { int i; __asm__("pextrd \$2, %%xmm0, %[out]\n\t" : [out] "=m" (i)); }]])], [gcry_cv_gcc_inline_asm_sse41=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_sse41" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSE41,1, [Defined if inline assembler supports SSE4.1 instructions]) fi # # Check whether GCC inline assembler supports AVX instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions], [gcry_cv_gcc_inline_asm_avx], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_avx="n/a" else gcry_cv_gcc_inline_asm_avx=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("xgetbv; vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):); }]])], [gcry_cv_gcc_inline_asm_avx=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX,1, [Defined if inline assembler supports AVX instructions]) fi # # Check whether GCC inline assembler supports AVX2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions], [gcry_cv_gcc_inline_asm_avx2], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_avx2="n/a" else gcry_cv_gcc_inline_asm_avx2=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc"); }]])], [gcry_cv_gcc_inline_asm_avx2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX2,1, [Defined if inline assembler supports AVX2 instructions]) fi # # Check whether GCC inline assembler supports BMI2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports BMI2 instructions], [gcry_cv_gcc_inline_asm_bmi2], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_bmi2="n/a" else gcry_cv_gcc_inline_asm_bmi2=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[unsigned int a(unsigned int x, unsigned int y) { unsigned int tmp1, tmp2; asm ("rorxl %2, %1, %0" : "=r" (tmp1) : "rm0" (x), "J" (32 - ((23) & 31))); asm ("andnl %2, %1, %0" : "=r" (tmp2) : "r0" (x), "rm" (y)); return tmp1 + tmp2; }]])], [gcry_cv_gcc_inline_asm_bmi2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_bmi2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_BMI2,1, [Defined if inline assembler supports BMI2 instructions]) fi # # Check whether GCC assembler needs "-Wa,--divide" to correctly handle # constant division # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler handles division correctly], [gcry_cv_gcc_as_const_division_ok], [gcry_cv_gcc_as_const_division_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__("xorl \$(123456789/12345678), %ebp;\n\t");]])], [gcry_cv_gcc_as_const_division_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_ok" = "no" ; then # # Add '-Wa,--divide' to CPPFLAGS and try check again. # _gcc_cppflags_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -Wa,--divide" AC_CACHE_CHECK([whether GCC assembler handles division correctly with "-Wa,--divide"], [gcry_cv_gcc_as_const_division_with_wadivide_ok], [gcry_cv_gcc_as_const_division_with_wadivide_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__("xorl \$(123456789/12345678), %ebp;\n\t");]])], [gcry_cv_gcc_as_const_division_with_wadivide_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_with_wadivide_ok" = "no" ; then # '-Wa,--divide' did not work, restore old flags. CPPFLAGS="$_gcc_cppflags_save" fi fi fi # # Check whether GCC assembler supports features needed for our amd64 # implementations # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler is compatible for amd64 assembly implementations], [gcry_cv_gcc_amd64_platform_as_ok], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_amd64_platform_as_ok="n/a" else gcry_cv_gcc_amd64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( /* Test if '.type' and '.size' are supported. */ /* These work only on ELF targets. */ "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,@function;\n\t" /* Test if assembler allows use of '/' for constant division * (Solaris/x86 issue). If previous constant division check * and "-Wa,--divide" workaround failed, this causes assembly * to be disable on this machine. */ "xorl \$(123456789/12345678), %ebp;\n\t" );]])], [gcry_cv_gcc_amd64_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with amd64 assembly implementations]) fi if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" && test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" && test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations], [gcry_cv_gcc_win64_platform_as_ok], [gcry_cv_gcc_win64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".globl asmfunc\n\t" "asmfunc:\n\t" "xorq \$(1234), %rbp;\n\t" );]])], [gcry_cv_gcc_win64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with WIN64 assembly implementations]) fi fi fi # # Check whether GCC assembler supports features needed for assembly # implementations that use Intel syntax # AC_CACHE_CHECK([whether GCC assembler is compatible for Intel syntax assembly implementations], [gcry_cv_gcc_platform_as_ok_for_intel_syntax], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_platform_as_ok_for_intel_syntax="n/a" else gcry_cv_gcc_platform_as_ok_for_intel_syntax=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".intel_syntax noprefix\n\t" "pxor xmm1, xmm7;\n\t" /* Intel syntax implementation also use GAS macros, so check * for them here. */ "VAL_A = xmm4\n\t" "VAL_B = xmm2\n\t" ".macro SET_VAL_A p1\n\t" " VAL_A = \\\\p1 \n\t" ".endm\n\t" ".macro SET_VAL_B p1\n\t" " VAL_B = \\\\p1 \n\t" ".endm\n\t" "vmovdqa VAL_A, VAL_B;\n\t" "SET_VAL_A eax\n\t" "SET_VAL_B ebp\n\t" "add VAL_A, VAL_B;\n\t" "add VAL_B, 0b10101;\n\t" );]])], [gcry_cv_gcc_platform_as_ok_for_intel_syntax=yes]) fi]) if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then AC_DEFINE(HAVE_INTEL_SYNTAX_PLATFORM_AS,1, [Defined if underlying assembler is compatible with Intel syntax assembly implementations]) fi # # Check whether compiler is configured for ARMv6 or newer architecture # AC_CACHE_CHECK([whether compiler is configured for ARMv6 or newer architecture], [gcry_cv_cc_arm_arch_is_v6], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_cc_arm_arch_is_v6="n/a" else gcry_cv_cc_arm_arch_is_v6=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[ #if defined(__arm__) && \ ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7EM__)) /* empty */ #else /* fail compile if not ARMv6. */ not_armv6 not_armv6 = (not_armv6)not_armv6; #endif ]])], [gcry_cv_cc_arm_arch_is_v6=yes]) fi]) if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then AC_DEFINE(HAVE_ARM_ARCH_V6,1, [Defined if ARM architecture is v6 or newer]) fi # # Check whether GCC inline assembler supports NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports NEON instructions], [gcry_cv_gcc_inline_asm_neon], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_gcc_inline_asm_neon="n/a" else gcry_cv_gcc_inline_asm_neon=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".syntax unified\n\t" ".arm\n\t" ".fpu neon\n\t" "vld1.64 {%q0-%q1}, [%r0]!;\n\t" "vrev64.8 %q0, %q3;\n\t" "vadd.u64 %q0, %q1;\n\t" "vadd.s64 %d3, %d2, %d3;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_NEON,1, [Defined if inline assembler supports NEON instructions]) fi # # Check whether GCC inline assembler supports AArch32 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch32 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch32_crypto], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_gcc_inline_asm_aarch32_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch32_crypto=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".syntax unified\n\t" ".arch armv8-a\n\t" ".arm\n\t" ".fpu crypto-neon-fp-armv8\n\t" "sha1h.32 q0, q0;\n\t" "sha1c.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha1su0.32 q0, q0, q0;\n\t" "sha1su1.32 q0, q0;\n\t" "sha256h.32 q0, q0, q0;\n\t" "sha256h2.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha256su0.32 q0, q0;\n\t" "sha256su1.32 q0, q0, q15;\n\t" "aese.8 q0, q0;\n\t" "aesd.8 q0, q0;\n\t" "aesmc.8 q0, q0;\n\t" "aesimc.8 q0, q0;\n\t" "vmull.p64 q0, d0, d0;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch32_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO,1, [Defined if inline assembler supports AArch32 Crypto Extension instructions]) fi # # Check whether GCC inline assembler supports AArch64 NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 NEON instructions], [gcry_cv_gcc_inline_asm_aarch64_neon], [if test "$mpi_cpu_arch" != "aarch64" ; then gcry_cv_gcc_inline_asm_aarch64_neon="n/a" else gcry_cv_gcc_inline_asm_aarch64_neon=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".cpu generic+simd\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch64_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_NEON,1, [Defined if inline assembler supports AArch64 NEON instructions]) fi # # Check whether GCC inline assembler supports AArch64 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch64_crypto], [if test "$mpi_cpu_arch" != "aarch64" ; then gcry_cv_gcc_inline_asm_aarch64_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch64_crypto=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".cpu generic+simd+crypto\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" "sha1h s0, s0;\n\t" "sha1c q0, s0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha1su0 v0.4s, v0.4s, v0.4s;\n\t" "sha1su1 v0.4s, v0.4s;\n\t" "sha256h q0, q0, v0.4s;\n\t" "sha256h2 q0, q0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha256su0 v0.4s, v0.4s;\n\t" "sha256su1 v0.4s, v0.4s, v31.4s;\n\t" "aese v0.16b, v0.16b;\n\t" "aesd v0.16b, v0.16b;\n\t" "aesmc v0.16b, v0.16b;\n\t" "aesimc v0.16b, v0.16b;\n\t" "pmull v0.1q, v0.1d, v31.1d;\n\t" "pmull2 v0.1q, v0.2d, v31.2d;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch64_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO,1, [Defined if inline assembler supports AArch64 Crypto Extension instructions]) fi ####################################### #### Checks for library functions. #### ####################################### AC_FUNC_VPRINTF # We have replacements for these in src/missing-string.c AC_CHECK_FUNCS(stpcpy strcasecmp) # We have replacements for these in src/g10lib.h AC_CHECK_FUNCS(strtoul memmove stricmp atexit raise) # Other checks AC_CHECK_FUNCS(strerror rand mmap getpagesize sysconf waitpid wait4) AC_CHECK_FUNCS(gettimeofday getrusage gethrtime clock_gettime syslog) AC_CHECK_FUNCS(syscall fcntl ftruncate flockfile getauxval elf_aux_info) AC_CHECK_FUNCS(explicit_bzero explicit_memset getentropy) GNUPG_CHECK_MLOCK # # Replacement functions. # AC_REPLACE_FUNCS([getpid clock]) # # Check whether it is necessary to link against libdl. # DL_LIBS="" if test "$use_hmac_binary_check" = yes ; then _gcry_save_libs="$LIBS" LIBS="" AC_SEARCH_LIBS(dlopen, c dl,,,) DL_LIBS=$LIBS LIBS="$_gcry_save_libs" LIBGCRYPT_CONFIG_LIBS="${LIBGCRYPT_CONFIG_LIBS} ${DL_LIBS}" fi AC_SUBST(DL_LIBS) # # Check whether we can use Linux capabilities as requested. # if test "$use_capabilities" = "yes" ; then use_capabilities=no AC_CHECK_HEADERS(sys/capability.h) if test "$ac_cv_header_sys_capability_h" = "yes" ; then AC_CHECK_LIB(cap, cap_init, ac_need_libcap=1) if test "$ac_cv_lib_cap_cap_init" = "yes"; then AC_DEFINE(USE_CAPABILITIES,1, [define if capabilities should be used]) LIBS="$LIBS -lcap" use_capabilities=yes fi fi if test "$use_capabilities" = "no" ; then AC_MSG_WARN([[ *** *** The use of capabilities on this system is not possible. *** You need a recent Linux kernel and some patches: *** fcaps-2.2.9-990610.patch (kernel patch for 2.2.9) *** fcap-module-990613.tar.gz (kernel module) *** libcap-1.92.tar.gz (user mode library and utilities) *** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN *** set (filesystems menu). Be warned: This code is *really* ALPHA. ***]]) fi fi # Check whether a random device is available. if test "$try_dev_random" = yes ; then AC_CACHE_CHECK(for random device, ac_cv_have_dev_random, [if test -r "$NAME_OF_DEV_RANDOM" && test -r "$NAME_OF_DEV_URANDOM" ; then ac_cv_have_dev_random=yes; else ac_cv_have_dev_random=no; fi]) if test "$ac_cv_have_dev_random" = yes; then AC_DEFINE(HAVE_DEV_RANDOM,1, [defined if the system supports a random device] ) fi else AC_MSG_CHECKING(for random device) ac_cv_have_dev_random=no AC_MSG_RESULT(has been disabled) fi # Figure out the random modules for this configuration. if test "$random" = "default"; then # Select default value. if test "$ac_cv_have_dev_random" = yes; then # Try Linuxish random device. random_modules="linux" else case "${host}" in *-*-mingw32ce*) # WindowsCE random device. random_modules="w32ce" ;; *-*-mingw32*|*-*-cygwin*) # Windows random device. random_modules="w32" ;; *) # Build everything, allow to select at runtime. random_modules="$auto_random_modules" ;; esac fi else if test "$random" = "auto"; then # Build everything, allow to select at runtime. random_modules="$auto_random_modules" else random_modules="$random" fi fi # # Other defines # if test mym4_isgit = "yes"; then AC_DEFINE(IS_DEVELOPMENT_VERSION,1, [Defined if this is not a regular release]) fi AM_CONDITIONAL(CROSS_COMPILING, test x$cross_compiling = xyes) # This is handy for debugging so the compiler doesn't rearrange # things and eliminate variables. AC_ARG_ENABLE(optimization, AC_HELP_STRING([--disable-optimization], [disable compiler optimization]), [if test $enableval = no ; then CFLAGS=`echo $CFLAGS | sed 's/-O[[0-9]]//'` fi]) AC_MSG_NOTICE([checking for cc features]) # CFLAGS mangling when using gcc. if test "$GCC" = yes; then AC_MSG_CHECKING([if gcc supports -fno-delete-null-pointer-checks]) _gcc_cflags_save=$CFLAGS CFLAGS="-fno-delete-null-pointer-checks" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -fno-delete-null-pointer-checks" fi CFLAGS="$CFLAGS -Wall" if test "$USE_MAINTAINER_MODE" = "yes"; then CFLAGS="$CFLAGS -Wcast-align -Wshadow -Wstrict-prototypes" CFLAGS="$CFLAGS -Wformat -Wno-format-y2k -Wformat-security" # If -Wno-missing-field-initializers is supported we can enable a # a bunch of really useful warnings. AC_MSG_CHECKING([if gcc supports -Wno-missing-field-initializers]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wno-missing-field-initializers" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -W -Wextra -Wbad-function-cast" CFLAGS="$CFLAGS -Wwrite-strings" CFLAGS="$CFLAGS -Wdeclaration-after-statement" CFLAGS="$CFLAGS -Wno-missing-field-initializers" CFLAGS="$CFLAGS -Wno-sign-compare" fi AC_MSG_CHECKING([if gcc supports -Wpointer-arith]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wpointer-arith" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -Wpointer-arith" fi fi fi # Check whether as(1) supports a noeexecstack feature. This test # includes an override option. CL_AS_NOEXECSTACK AC_SUBST(LIBGCRYPT_CONFIG_API_VERSION) AC_SUBST(LIBGCRYPT_CONFIG_LIBS) AC_SUBST(LIBGCRYPT_CONFIG_CFLAGS) AC_SUBST(LIBGCRYPT_CONFIG_HOST) AC_SUBST(LIBGCRYPT_THREAD_MODULES) AC_CONFIG_COMMANDS([gcrypt-conf],[[ chmod +x src/libgcrypt-config ]],[[ prefix=$prefix exec_prefix=$exec_prefix libdir=$libdir datadir=$datadir DATADIRNAME=$DATADIRNAME ]]) ##################### #### Conclusion. #### ##################### # Check that requested feature can actually be used and define # ENABLE_foo_SUPPORT macros. if test x"$aesnisupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_ssse3" != "yes" ; then aesnisupport="no (unsupported by compiler)" fi fi if test x"$shaextsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_shaext" != "yes" ; then shaextsupport="no (unsupported by compiler)" fi fi if test x"$pclmulsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then pclmulsupport="no (unsupported by compiler)" fi fi if test x"$sse41support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_sse41" != "yes" ; then sse41support="no (unsupported by compiler)" fi fi if test x"$avxsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then avxsupport="no (unsupported by compiler)" fi fi if test x"$avx2support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx2" != "yes" ; then avx2support="no (unsupported by compiler)" fi fi if test x"$neonsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_neon" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_neon" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$armcryptosupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$aesnisupport" = xyes ; then AC_DEFINE(ENABLE_AESNI_SUPPORT, 1, [Enable support for Intel AES-NI instructions.]) fi if test x"$shaextsupport" = xyes ; then AC_DEFINE(ENABLE_SHAEXT_SUPPORT, 1, [Enable support for Intel SHAEXT instructions.]) fi if test x"$pclmulsupport" = xyes ; then AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1, [Enable support for Intel PCLMUL instructions.]) fi if test x"$sse41support" = xyes ; then AC_DEFINE(ENABLE_SSE41_SUPPORT, 1, [Enable support for Intel SSE4.1 instructions.]) fi if test x"$avxsupport" = xyes ; then AC_DEFINE(ENABLE_AVX_SUPPORT,1, [Enable support for Intel AVX instructions.]) fi if test x"$avx2support" = xyes ; then AC_DEFINE(ENABLE_AVX2_SUPPORT,1, [Enable support for Intel AVX2 instructions.]) fi if test x"$neonsupport" = xyes ; then AC_DEFINE(ENABLE_NEON_SUPPORT,1, [Enable support for ARM NEON instructions.]) fi if test x"$armcryptosupport" = xyes ; then AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) fi if test x"$padlocksupport" = xyes ; then AC_DEFINE(ENABLE_PADLOCK_SUPPORT, 1, [Enable support for the PadLock engine.]) fi if test x"$drngsupport" = xyes ; then AC_DEFINE(ENABLE_DRNG_SUPPORT, 1, [Enable support for Intel DRNG (RDRAND instruction).]) fi # Define conditional sources and config.h symbols depending on the # selected ciphers, pubkey-ciphers, digests, kdfs, and random modules. LIST_MEMBER(arcfour, $enabled_ciphers) if test "$found" = "1"; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo" AC_DEFINE(USE_ARCFOUR, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour-amd64.lo" ;; esac fi LIST_MEMBER(blowfish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish.lo" AC_DEFINE(USE_BLOWFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-arm.lo" ;; esac fi LIST_MEMBER(cast5, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo" AC_DEFINE(USE_CAST5, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-arm.lo" ;; esac fi LIST_MEMBER(des, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo" AC_DEFINE(USE_DES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS des-amd64.lo" ;; esac fi LIST_MEMBER(aes, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael.lo" AC_DEFINE(USE_AES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-amd64.lo" # Build with the SSSE3 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ssse3-amd64.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ssse3-amd64-asm.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-arm.lo" # Build with the ARMv8/AArch32 CE implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-aarch64.lo" # Build with the ARMv8/AArch64 CE implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the AES-NI implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-aesni.lo" # Build with the Padlock implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-padlock.lo" ;; esac fi LIST_MEMBER(twofish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish.lo" AC_DEFINE(USE_TWOFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-amd64.lo" if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-avx2-amd64.lo" fi ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-aarch64.lo" ;; esac fi LIST_MEMBER(serpent, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent.lo" AC_DEFINE(USE_SERPENT, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the SSE2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-sse2-amd64.lo" ;; esac if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-avx2-amd64.lo" fi if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-armv7-neon.lo" fi fi LIST_MEMBER(rfc2268, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rfc2268.lo" AC_DEFINE(USE_RFC2268, 1, [Defined if this module should be included]) fi LIST_MEMBER(seed, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS seed.lo" AC_DEFINE(USE_SEED, 1, [Defined if this module should be included]) fi LIST_MEMBER(camellia, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia.lo camellia-glue.lo" AC_DEFINE(USE_CAMELLIA, 1, [Defined if this module should be included]) case "${host}" in arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aarch64.lo" ;; esac if test x"$avxsupport" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aesni-avx-amd64.lo" fi fi if test x"$avx2support" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aesni-avx2-amd64.lo" fi fi fi LIST_MEMBER(idea, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS idea.lo" AC_DEFINE(USE_IDEA, 1, [Defined if this module should be included]) fi LIST_MEMBER(salsa20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20.lo" AC_DEFINE(USE_SALSA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20-amd64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20-armv7-neon.lo" fi fi LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS gost28147.lo" AC_DEFINE(USE_GOST28147, 1, [Defined if this module should be included]) fi LIST_MEMBER(chacha20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20.lo" AC_DEFINE(USE_CHACHA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-amd64-ssse3.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-amd64-avx2.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-aarch64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-armv7-neon.lo" fi fi LIST_MEMBER(dsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS dsa.lo" AC_DEFINE(USE_DSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(rsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS rsa.lo" AC_DEFINE(USE_RSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(elgamal, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS elgamal.lo" AC_DEFINE(USE_ELGAMAL, 1, [Defined if this module should be included]) fi LIST_MEMBER(ecc, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi LIST_MEMBER(crc, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc.lo" AC_DEFINE(USE_CRC, 1, [Defined if this module should be included]) case "${host}" in i?86-*-* | x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-armv8-aarch64-ce.lo" ;; esac fi LIST_MEMBER(gostr3411-94, $enabled_digests) if test "$found" = "1" ; then # GOST R 34.11-94 internally uses GOST 28147-89 LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS gostr3411-94.lo" AC_DEFINE(USE_GOST_R_3411_94, 1, [Defined if this module should be included]) fi fi LIST_MEMBER(stribog, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS stribog.lo" AC_DEFINE(USE_GOST_R_3411_12, 1, [Defined if this module should be included]) fi LIST_MEMBER(md2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md2.lo" AC_DEFINE(USE_MD2, 1, [Defined if this module should be included]) fi LIST_MEMBER(md4, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md4.lo" AC_DEFINE(USE_MD4, 1, [Defined if this module should be included]) fi LIST_MEMBER(md5, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md5.lo" AC_DEFINE(USE_MD5, 1, [Defined if this module should be included]) fi LIST_MEMBER(rmd160, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS rmd160.lo" AC_DEFINE(USE_RMD160, 1, [Defined if this module should be included]) fi LIST_MEMBER(sha256, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256.lo" AC_DEFINE(USE_SHA256, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-intel-shaext.lo" ;; esac fi LIST_MEMBER(sha512, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512.lo" AC_DEFINE(USE_SHA512, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-arm.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-armv7-neon.lo" fi fi LIST_MEMBER(sha3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak.lo" AC_DEFINE(USE_SHA3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation : ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak-armv7-neon.lo" fi fi LIST_MEMBER(tiger, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS tiger.lo" AC_DEFINE(USE_TIGER, 1, [Defined if this module should be included]) fi LIST_MEMBER(whirlpool, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool.lo" AC_DEFINE(USE_WHIRLPOOL, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool-sse2-amd64.lo" ;; esac fi LIST_MEMBER(blake2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2.lo" AC_DEFINE(USE_BLAKE2, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2b-amd64-avx2.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2s-amd64-avx.lo" ;; esac fi # SHA-1 needs to be included always for example because it is used by # random-csprng.c. GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1.lo" AC_DEFINE(USE_SHA1, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-avx-bmi2-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv7-neon.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-intel-shaext.lo" ;; esac LIST_MEMBER(sm3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo" AC_DEFINE(USE_SM3, 1, [Defined if this module should be included]) fi LIST_MEMBER(scrypt, $enabled_kdfs) if test "$found" = "1" ; then GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo" AC_DEFINE(USE_SCRYPT, 1, [Defined if this module should be included]) fi LIST_MEMBER(linux, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndlinux.lo" AC_DEFINE(USE_RNDLINUX, 1, [Defined if the /dev/random RNG should be used.]) fi LIST_MEMBER(unix, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndunix.lo" AC_DEFINE(USE_RNDUNIX, 1, [Defined if the default Unix RNG should be used.]) fi LIST_MEMBER(egd, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndegd.lo" AC_DEFINE(USE_RNDEGD, 1, [Defined if the EGD based RNG should be used.]) fi LIST_MEMBER(w32, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32.lo" AC_DEFINE(USE_RNDW32, 1, [Defined if the Windows specific RNG should be used.]) fi LIST_MEMBER(w32ce, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32ce.lo" AC_DEFINE(USE_RNDW32CE, 1, [Defined if the WindowsCE specific RNG should be used.]) fi AC_SUBST([GCRYPT_CIPHERS]) AC_SUBST([GCRYPT_PUBKEY_CIPHERS]) AC_SUBST([GCRYPT_DIGESTS]) AC_SUBST([GCRYPT_KDFS]) AC_SUBST([GCRYPT_RANDOM]) AC_SUBST(LIBGCRYPT_CIPHERS, $enabled_ciphers) AC_SUBST(LIBGCRYPT_PUBKEY_CIPHERS, $enabled_pubkey_ciphers) AC_SUBST(LIBGCRYPT_DIGESTS, $enabled_digests) # For printing the configuration we need a colon separated list of # algorithm names. tmp=`echo "$enabled_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_CIPHERS, "$tmp", [List of available cipher algorithms]) tmp=`echo "$enabled_pubkey_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_PUBKEY_CIPHERS, "$tmp", [List of available public key cipher algorithms]) tmp=`echo "$enabled_digests" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_DIGESTS, "$tmp", [List of available digest algorithms]) tmp=`echo "$enabled_kdfs" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_KDFS, "$tmp", [List of available KDF algorithms]) # # Define conditional sources depending on the used hardware platform. # Note that all possible modules must also be listed in # src/Makefile.am (EXTRA_libgcrypt_la_SOURCES). # GCRYPT_HWF_MODULES= case "$mpi_cpu_arch" in x86) AC_DEFINE(HAVE_CPU_ARCH_X86, 1, [Defined for the x86 platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-x86.lo" ;; alpha) AC_DEFINE(HAVE_CPU_ARCH_ALPHA, 1, [Defined for Alpha platforms]) ;; sparc) AC_DEFINE(HAVE_CPU_ARCH_SPARC, 1, [Defined for SPARC platforms]) ;; mips) AC_DEFINE(HAVE_CPU_ARCH_MIPS, 1, [Defined for MIPS platforms]) ;; m68k) AC_DEFINE(HAVE_CPU_ARCH_M68K, 1, [Defined for M68k platforms]) ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; aarch64) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM AArch64 platforms]) GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo" ;; esac AC_SUBST([GCRYPT_HWF_MODULES]) # # Option to disable building of doc file # build_doc=yes AC_ARG_ENABLE([doc], AC_HELP_STRING([--disable-doc], [do not build the documentation]), build_doc=$enableval, build_doc=yes) AM_CONDITIONAL([BUILD_DOC], [test "x$build_doc" != xno]) # # Provide information about the build. # BUILD_REVISION="mym4_revision" AC_SUBST(BUILD_REVISION) AC_DEFINE_UNQUOTED(BUILD_REVISION, "$BUILD_REVISION", [GIT commit id revision used to build this package]) changequote(,)dnl BUILD_VERSION=`echo "$PACKAGE_VERSION" | sed 's/\([0-9.]*\).*/\1./'` changequote([,])dnl BUILD_VERSION="${BUILD_VERSION}mym4_revision_dec" BUILD_FILEVERSION=`echo "${BUILD_VERSION}" | tr . ,` AC_SUBST(BUILD_VERSION) AC_SUBST(BUILD_FILEVERSION) AC_ARG_ENABLE([build-timestamp], AC_HELP_STRING([--enable-build-timestamp], [set an explicit build timestamp for reproducibility. (default is the current time in ISO-8601 format)]), [if test "$enableval" = "yes"; then BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date` else BUILD_TIMESTAMP="$enableval" fi], [BUILD_TIMESTAMP=""]) AC_SUBST(BUILD_TIMESTAMP) AC_DEFINE_UNQUOTED(BUILD_TIMESTAMP, "$BUILD_TIMESTAMP", [The time this package was configured for a build]) # And create the files. AC_CONFIG_FILES([ Makefile m4/Makefile compat/Makefile mpi/Makefile cipher/Makefile random/Makefile doc/Makefile src/Makefile src/gcrypt.h src/libgcrypt-config src/libgcrypt.pc src/versioninfo.rc tests/Makefile ]) AC_CONFIG_FILES([tests/hashtest-256g], [chmod +x tests/hashtest-256g]) AC_CONFIG_FILES([tests/basic-disable-all-hwf], [chmod +x tests/basic-disable-all-hwf]) AC_OUTPUT detection_module="${GCRYPT_HWF_MODULES%.lo}" test -n "$detection_module" || detection_module="none" # Give some feedback GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Libgcrypt],[v${VERSION} has been configured as follows:]) GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Platform: ],[$PRINTABLE_OS_NAME ($host)]) GCRY_MSG_SHOW([Hardware detection module:],[$detection_module]) GCRY_MSG_WRAP([Enabled cipher algorithms:],[$enabled_ciphers]) GCRY_MSG_WRAP([Enabled digest algorithms:],[$enabled_digests]) GCRY_MSG_WRAP([Enabled kdf algorithms: ],[$enabled_kdfs]) GCRY_MSG_WRAP([Enabled pubkey algorithms:],[$enabled_pubkey_ciphers]) GCRY_MSG_SHOW([Random number generator: ],[$random]) GCRY_MSG_SHOW([Try using jitter entropy: ],[$jentsupport]) GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities]) GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport]) GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport]) GCRY_MSG_SHOW([Try using Intel SHAEXT: ],[$shaextsupport]) GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport]) GCRY_MSG_SHOW([Try using Intel SSE4.1: ],[$sse41support]) GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport]) GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport]) GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then cat <