diff --git a/cipher/Makefile.am b/cipher/Makefile.am index bbfab4c8..08baa7c4 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -1,131 +1,130 @@ # Makefile for cipher modules # Copyright (C) 1998, 1999, 2000, 2001, 2002, # 2003, 2009 Free Software Foundation, Inc. # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # Process this file with automake to produce Makefile.in # Need to include ../src in addition to top_srcdir because gcrypt.h is # a built header. AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi AM_CFLAGS = $(GPG_ERROR_CFLAGS) AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) EXTRA_DIST = gost-s-box.c CLEANFILES = gost-s-box DISTCLEANFILES = gost-sb.h noinst_LTLIBRARIES = libcipher.la GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) libcipher_la_LIBADD = $(GCRYPT_MODULES) libcipher_la_SOURCES = \ cipher.c cipher-internal.h \ cipher-cbc.c cipher-cfb.c cipher-ofb.c cipher-ctr.c cipher-aeswrap.c \ cipher-ccm.c cipher-cmac.c cipher-gcm.c cipher-gcm-intel-pclmul.c \ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ cipher-poly1305.c cipher-ocb.c cipher-xts.c \ cipher-selftest.c cipher-selftest.h \ pubkey.c pubkey-internal.h pubkey-util.c \ md.c \ mac.c mac-internal.h \ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ poly1305.c poly1305-internal.h \ kdf.c kdf-internal.h \ hmac-tests.c \ bithelp.h \ bufhelp.h \ primegen.c \ hash-common.c hash-common.h \ dsa-common.c rsa-common.c \ sha1.h EXTRA_libcipher_la_SOURCES = \ arcfour.c arcfour-amd64.S \ blowfish.c blowfish-amd64.S blowfish-arm.S \ cast5.c cast5-amd64.S cast5-arm.S \ chacha20.c chacha20-sse2-amd64.S chacha20-ssse3-amd64.S chacha20-avx2-amd64.S \ chacha20-armv7-neon.S \ crc.c \ crc-intel-pclmul.c \ des.c des-amd64.S \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ md4.c \ md5.c \ -poly1305-sse2-amd64.S poly1305-avx2-amd64.S poly1305-armv7-neon.S \ rijndael.c rijndael-internal.h rijndael-tables.h rijndael-aesni.c \ rijndael-padlock.c rijndael-amd64.S rijndael-arm.S \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S rijndael-armv8-aarch64-ce.S \ rijndael-aarch64.S \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ scrypt.c \ seed.c \ serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S serpent-armv7-neon.S \ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha1-armv7-neon.S sha1-armv8-aarch32-ce.S sha1-armv8-aarch64-ce.S \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S sha256-avx2-bmi2-amd64.S \ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S sha512-avx2-bmi2-amd64.S \ sha512-armv7-neon.S sha512-arm.S \ sm3.c \ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ twofish-avx2-amd64.S \ rfc2268.c \ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ blake2.c gost28147.lo: gost-sb.h gost-sb.h: gost-s-box ./gost-s-box $@ gost-s-box: gost-s-box.c $(CC_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c if ENABLE_O_FLAG_MUNGING o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g' else o_flag_munging = cat endif # We need to lower the optimization for this module. tiger.o: $(srcdir)/tiger.c `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` tiger.lo: $(srcdir)/tiger.c `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` diff --git a/cipher/poly1305-armv7-neon.S b/cipher/poly1305-armv7-neon.S deleted file mode 100644 index 13cb4a5d..00000000 --- a/cipher/poly1305-armv7-neon.S +++ /dev/null @@ -1,744 +0,0 @@ -/* poly1305-armv7-neon.S - ARMv7/NEON implementation of Poly1305 - * - * Copyright (C) 2014 Jussi Kivilinna - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see . - */ - -/* - * Based on public domain implementation by Andrew Moon at - * https://github.com/floodyberry/poly1305-opt - */ - -#include - -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ - defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_NEON) - -.syntax unified -.fpu neon -.arm - -#ifdef __PIC__ -# define GET_DATA_POINTER(reg, name, rtmp) \ - ldr reg, 1f; \ - ldr rtmp, 2f; \ - b 3f; \ - 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ - 2: .word name(GOT); \ - 3: add reg, pc, reg; \ - ldr reg, [reg, rtmp]; -#else -# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name -#endif - -#define UNALIGNED_LDMIA2(ptr, l0, l1) \ - tst ptr, #3; \ - beq 1f; \ - vpush {d0}; \ - vld1.32 {d0}, [ptr]!; \ - vmov l0, s0; \ - vmov l1, s1; \ - vpop {d0}; \ - b 2f; \ - 1: ldmia ptr!, {l0-l1}; \ - 2: ; - -#define UNALIGNED_LDMIA4(ptr, l0, l1, l2, l3) \ - tst ptr, #3; \ - beq 1f; \ - vpush {d0-d1}; \ - vld1.32 {d0-d1}, [ptr]!; \ - vmov l0, s0; \ - vmov l1, s1; \ - vmov l2, s2; \ - vmov l3, s3; \ - vpop {d0-d1}; \ - b 2f; \ - 1: ldmia ptr!, {l0-l3}; \ - 2: ; - -.text - -.p2align 2 -.Lpoly1305_init_constants_neon: -.long 0x3ffff03 -.long 0x3ffc0ff -.long 0x3f03fff -.long 0x00fffff - -.globl _gcry_poly1305_armv7_neon_init_ext -.type _gcry_poly1305_armv7_neon_init_ext,%function; -_gcry_poly1305_armv7_neon_init_ext: -.Lpoly1305_init_ext_neon_local: - stmfd sp!, {r4-r11, lr} - sub sp, sp, #32 - mov r14, r2 - and r2, r2, r2 - moveq r14, #-1 - UNALIGNED_LDMIA4(r1, r2, r3, r4, r5) - GET_DATA_POINTER(r7,.Lpoly1305_init_constants_neon,r8) - mov r6, r2 - mov r8, r2, lsr #26 - mov r9, r3, lsr #20 - mov r10, r4, lsr #14 - mov r11, r5, lsr #8 - orr r8, r8, r3, lsl #6 - orr r9, r9, r4, lsl #12 - orr r10, r10, r5, lsl #18 - ldmia r7, {r2-r5} - and r2, r2, r8 - and r3, r3, r9 - and r4, r4, r10 - and r5, r5, r11 - and r6, r6, 0x3ffffff - stmia r0!, {r2-r6} - eor r8, r8, r8 - str r8, [sp, #24] -.Lpoly1305_init_ext_neon_squareloop: - ldr r8, [sp, #24] - mov r12, #16 - cmp r8, #2 - beq .Lpoly1305_init_ext_neon_donesquaring - cmp r8, #1 - moveq r12, #64 - cmp r14, r12 - bls .Lpoly1305_init_ext_neon_donesquaring - add r8, #1 - str r8, [sp, #24] - mov r6, r6, lsl #1 - mov r2, r2, lsl #1 - umull r7, r8, r3, r3 - umull r9, r10, r6, r4 - umlal r7, r8, r6, r5 - umlal r9, r10, r2, r3 - add r11, r5, r5, lsl #2 - umlal r7, r8, r2, r4 - umlal r9, r10, r5, r11 - str r7, [sp, #16] - str r8, [sp, #20] - mov r2, r2, lsr #1 - mov r5, r5, lsl #1 - str r9, [sp, #8] - str r10, [sp, #12] - umull r7, r8, r2, r2 - umull r9, r10, r6, r2 - add r11, r3, r3, lsl #2 - add r12, r4, r4, lsl #2 - umlal r7, r8, r6, r3 - umlal r9, r10, r5, r11 - umlal r7, r8, r5, r12 - umlal r9, r10, r4, r12 - mov r6, r6, lsr #1 - mov r3, r3, lsl #1 - add r11, r2, r2, lsl #2 - str r7, [sp, #0] - str r8, [sp, #4] - umull r7, r8, r6, r6 - umlal r7, r8, r3, r12 - umlal r7, r8, r5, r11 - and r6, r7, 0x3ffffff - mov r11, r7, lsr #26 - orr r11, r11, r8, lsl #6 - ldr r7, [sp, #0] - ldr r8, [sp, #4] - adds r9, r9, r11 - adc r10, r10, #0 - and r2, r9, 0x3ffffff - mov r11, r9, lsr #26 - orr r11, r11, r10, lsl #6 - ldr r9, [sp, #8] - ldr r10, [sp, #12] - adds r7, r7, r11 - adc r8, r8, #0 - and r3, r7, 0x3ffffff - mov r11, r7, lsr #26 - orr r11, r11, r8, lsl #6 - ldr r7, [sp, #16] - ldr r8, [sp, #20] - adds r9, r9, r11 - adc r10, r10, #0 - and r4, r9, 0x3ffffff - mov r11, r9, lsr #26 - orr r11, r11, r10, lsl #6 - adds r7, r7, r11 - adc r8, r8, #0 - and r5, r7, 0x3ffffff - mov r11, r7, lsr #26 - orr r11, r11, r8, lsl #6 - add r11, r11, r11, lsl #2 - add r6, r6, r11 - mov r11, r6, lsr #26 - and r6, r6, 0x3ffffff - add r2, r2, r11 - stmia r0!, {r2-r6} - b .Lpoly1305_init_ext_neon_squareloop -.Lpoly1305_init_ext_neon_donesquaring: - mov r2, #2 - ldr r14, [sp, #24] - sub r14, r2, r14 - mov r3, r14, lsl #4 - add r3, r3, r14, lsl #2 - add r0, r0, r3 - eor r2, r2, r2 - eor r3, r3, r3 - eor r4, r4, r4 - eor r5, r5, r5 - eor r6, r6, r6 - stmia r0!, {r2-r6} - stmia r0!, {r2-r6} - UNALIGNED_LDMIA4(r1, r2, r3, r4, r5) - stmia r0, {r2-r6} - add sp, sp, #32 - ldmfd sp!, {r4-r11, lr} - mov r0, #(9*4+32) - bx lr -.ltorg -.size _gcry_poly1305_armv7_neon_init_ext,.-_gcry_poly1305_armv7_neon_init_ext; - -.globl _gcry_poly1305_armv7_neon_blocks -.type _gcry_poly1305_armv7_neon_blocks,%function; -_gcry_poly1305_armv7_neon_blocks: -.Lpoly1305_blocks_neon_local: - vmov.i32 q0, #0xffffffff - vmov.i32 d4, #1 - vsubw.u32 q0, q0, d4 - vstmdb sp!, {q4,q5,q6,q7} - stmfd sp!, {r4-r11, lr} - mov r8, sp - and sp, sp, #~63 - sub sp, sp, #192 - str r0, [sp, #108] - str r1, [sp, #112] - str r2, [sp, #116] - str r8, [sp, #120] - mov r3, r0 - mov r0, r1 - mov r1, r2 - mov r2, r3 - ldr r8, [r2, #116] - veor d15, d15, d15 - vorr.i32 d15, #(1 << 24) - tst r8, #2 - beq .Lpoly1305_blocks_neon_skip_shift8 - vshr.u64 d15, #32 -.Lpoly1305_blocks_neon_skip_shift8: - tst r8, #4 - beq .Lpoly1305_blocks_neon_skip_shift16 - veor d15, d15, d15 -.Lpoly1305_blocks_neon_skip_shift16: - vst1.64 d15, [sp, :64] - tst r8, #1 - bne .Lpoly1305_blocks_neon_started - vld1.64 {q0-q1}, [r0]! - vswp d1, d2 - vmovn.i64 d21, q0 - vshrn.i64 d22, q0, #26 - vshrn.u64 d24, q1, #14 - vext.8 d0, d0, d2, #4 - vext.8 d1, d1, d3, #4 - vshr.u64 q1, q1, #32 - vshrn.i64 d23, q0, #20 - vshrn.u64 d25, q1, #8 - vand.i32 d21, #0x03ffffff - vand.i32 q11, #0x03ffffff - vand.i32 q12, #0x03ffffff - orr r8, r8, #1 - sub r1, r1, #32 - str r8, [r2, #116] - vorr d25, d25, d15 - b .Lpoly1305_blocks_neon_setupr20 -.Lpoly1305_blocks_neon_started: - add r9, r2, #60 - vldm r9, {d21-d25} -.Lpoly1305_blocks_neon_setupr20: - vmov.i32 d0, #5 - tst r8, #(8|16) - beq .Lpoly1305_blocks_neon_setupr20_simple - tst r8, #(8) - beq .Lpoly1305_blocks_neon_setupr20_r_1 - mov r9, r2 - add r10, r2, #20 - vld1.64 {q9}, [r9]! - vld1.64 {q8}, [r10]! - vld1.64 {d2}, [r9] - vld1.64 {d20}, [r10] - b .Lpoly1305_blocks_neon_setupr20_hard -.Lpoly1305_blocks_neon_setupr20_r_1: - mov r9, r2 - vmov.i32 d2, #1 - vld1.64 {q8}, [r9]! - veor q9, q9, q9 - vshr.u64 d2, d2, #32 - vld1.64 {d20}, [r9] -.Lpoly1305_blocks_neon_setupr20_hard: - vzip.i32 q8, q9 - vzip.i32 d20, d2 - b .Lpoly1305_blocks_neon_setups20 -.Lpoly1305_blocks_neon_setupr20_simple: - add r9, r2, #20 - vld1.64 {d2-d4}, [r9] - vdup.32 d16, d2[0] - vdup.32 d17, d2[1] - vdup.32 d18, d3[0] - vdup.32 d19, d3[1] - vdup.32 d20, d4[0] -.Lpoly1305_blocks_neon_setups20: - vmul.i32 q13, q8, d0[0] - vmov.i64 q15, 0x00000000ffffffff - vmul.i32 q14, q9, d0[0] - vshr.u64 q15, q15, #6 - cmp r1, #64 - blo .Lpoly1305_blocks_neon_try32 - add r9, sp, #16 - add r10, r2, #40 - add r11, sp, #64 - str r1, [sp, #116] - vld1.64 {d10-d12}, [r10] - vmov d14, d12 - vmul.i32 q6, q5, d0[0] -.Lpoly1305_blocks_neon_mainloop: - UNALIGNED_LDMIA4(r0, r2, r3, r4, r5) - vmull.u32 q0, d25, d12[0] - mov r7, r2, lsr #26 - vmlal.u32 q0, d24, d12[1] - mov r8, r3, lsr #20 - ldr r6, [sp, #0] - vmlal.u32 q0, d23, d13[0] - mov r9, r4, lsr #14 - vmlal.u32 q0, d22, d13[1] - orr r6, r6, r5, lsr #8 - vmlal.u32 q0, d21, d14[0] - orr r3, r7, r3, lsl #6 - vmull.u32 q1, d25, d12[1] - orr r4, r8, r4, lsl #12 - orr r5, r9, r5, lsl #18 - vmlal.u32 q1, d24, d13[0] - UNALIGNED_LDMIA4(r0, r7, r8, r9, r10) - vmlal.u32 q1, d23, d13[1] - mov r1, r7, lsr #26 - vmlal.u32 q1, d22, d14[0] - ldr r11, [sp, #4] - mov r12, r8, lsr #20 - vmlal.u32 q1, d21, d10[0] - mov r14, r9, lsr #14 - vmull.u32 q2, d25, d13[0] - orr r11, r11, r10, lsr #8 - orr r8, r1, r8, lsl #6 - vmlal.u32 q2, d24, d13[1] - orr r9, r12, r9, lsl #12 - vmlal.u32 q2, d23, d14[0] - orr r10, r14, r10, lsl #18 - vmlal.u32 q2, d22, d10[0] - mov r12, r3 - and r2, r2, #0x3ffffff - vmlal.u32 q2, d21, d10[1] - mov r14, r5 - vmull.u32 q3, d25, d13[1] - and r3, r7, #0x3ffffff - vmlal.u32 q3, d24, d14[0] - and r5, r8, #0x3ffffff - vmlal.u32 q3, d23, d10[0] - and r7, r9, #0x3ffffff - vmlal.u32 q3, d22, d10[1] - and r8, r14, #0x3ffffff - vmlal.u32 q3, d21, d11[0] - and r9, r10, #0x3ffffff - add r14, sp, #128 - vmull.u32 q4, d25, d14[0] - mov r10, r6 - vmlal.u32 q4, d24, d10[0] - and r6, r4, #0x3ffffff - vmlal.u32 q4, d23, d10[1] - and r4, r12, #0x3ffffff - vmlal.u32 q4, d22, d11[0] - stm r14, {r2-r11} - vmlal.u32 q4, d21, d11[1] - vld1.64 {d21-d24}, [r14, :256]! - vld1.64 {d25}, [r14, :64] - UNALIGNED_LDMIA4(r0, r2, r3, r4, r5) - vmlal.u32 q0, d25, d26 - mov r7, r2, lsr #26 - vmlal.u32 q0, d24, d27 - ldr r6, [sp, #0] - mov r8, r3, lsr #20 - vmlal.u32 q0, d23, d28 - mov r9, r4, lsr #14 - vmlal.u32 q0, d22, d29 - orr r6, r6, r5, lsr #8 - vmlal.u32 q0, d21, d20 - orr r3, r7, r3, lsl #6 - vmlal.u32 q1, d25, d27 - orr r4, r8, r4, lsl #12 - orr r5, r9, r5, lsl #18 - vmlal.u32 q1, d24, d28 - UNALIGNED_LDMIA4(r0, r7, r8, r9, r10) - vmlal.u32 q1, d23, d29 - mov r1, r7, lsr #26 - vmlal.u32 q1, d22, d20 - ldr r11, [sp, #4] - mov r12, r8, lsr #20 - vmlal.u32 q1, d21, d16 - mov r14, r9, lsr #14 - vmlal.u32 q2, d25, d28 - orr r11, r11, r10, lsr #8 - orr r8, r1, r8, lsl #6 - orr r9, r12, r9, lsl #12 - vmlal.u32 q2, d24, d29 - orr r10, r14, r10, lsl #18 - and r2, r2, #0x3ffffff - mov r12, r3 - vmlal.u32 q2, d23, d20 - mov r14, r5 - vmlal.u32 q2, d22, d16 - and r3, r7, #0x3ffffff - vmlal.u32 q2, d21, d17 - and r5, r8, #0x3ffffff - vmlal.u32 q3, d25, d29 - and r7, r9, #0x3ffffff - vmlal.u32 q3, d24, d20 - and r8, r14, #0x3ffffff - vmlal.u32 q3, d23, d16 - and r9, r10, #0x3ffffff - vmlal.u32 q3, d22, d17 - add r14, sp, #128 - vmlal.u32 q3, d21, d18 - mov r10, r6 - vmlal.u32 q4, d25, d20 - vmlal.u32 q4, d24, d16 - and r6, r4, #0x3ffffff - vmlal.u32 q4, d23, d17 - and r4, r12, #0x3ffffff - vmlal.u32 q4, d22, d18 - stm r14, {r2-r11} - vmlal.u32 q4, d21, d19 - vld1.64 {d21-d24}, [r14, :256]! - vld1.64 {d25}, [r14, :64] - vaddw.u32 q0, q0, d21 - vaddw.u32 q1, q1, d22 - vaddw.u32 q2, q2, d23 - vaddw.u32 q3, q3, d24 - vaddw.u32 q4, q4, d25 - vshr.u64 q11, q0, #26 - vand q0, q0, q15 - vadd.i64 q1, q1, q11 - vshr.u64 q12, q3, #26 - vand q3, q3, q15 - vadd.i64 q4, q4, q12 - vshr.u64 q11, q1, #26 - vand q1, q1, q15 - vadd.i64 q2, q2, q11 - vshr.u64 q12, q4, #26 - vand q4, q4, q15 - vadd.i64 q0, q0, q12 - vshl.i64 q12, q12, #2 - ldr r1, [sp, #116] - vadd.i64 q0, q0, q12 - vshr.u64 q11, q2, #26 - vand q2, q2, q15 - vadd.i64 q3, q3, q11 - sub r1, #64 - vshr.u64 q12, q0, #26 - vand q0, q0, q15 - vadd.i64 q1, q1, q12 - cmp r1, #64 - vshr.u64 q11, q3, #26 - vand q3, q3, q15 - vadd.i64 q4, q4, q11 - vmovn.i64 d21, q0 - str r1, [sp, #116] - vmovn.i64 d22, q1 - vmovn.i64 d23, q2 - vmovn.i64 d24, q3 - vmovn.i64 d25, q4 - bhs .Lpoly1305_blocks_neon_mainloop -.Lpoly1305_blocks_neon_try32: - cmp r1, #32 - blo .Lpoly1305_blocks_neon_done - tst r0, r0 - bne .Lpoly1305_blocks_loadm32 - veor q0, q0, q0 - veor q1, q1, q1 - veor q2, q2, q2 - veor q3, q3, q3 - veor q4, q4, q4 - b .Lpoly1305_blocks_continue32 -.Lpoly1305_blocks_loadm32: - vld1.64 {q0-q1}, [r0]! - veor q4, q4, q4 - vswp d1, d2 - veor q3, q3, q3 - vtrn.32 q0, q4 - vtrn.32 q1, q3 - vshl.i64 q2, q1, #12 - vshl.i64 q3, q3, #18 - vshl.i64 q1, q4, #6 - vmovl.u32 q4, d15 -.Lpoly1305_blocks_continue32: - vmlal.u32 q0, d25, d26 - vmlal.u32 q0, d24, d27 - vmlal.u32 q0, d23, d28 - vmlal.u32 q0, d22, d29 - vmlal.u32 q0, d21, d20 - vmlal.u32 q1, d25, d27 - vmlal.u32 q1, d24, d28 - vmlal.u32 q1, d23, d29 - vmlal.u32 q1, d22, d20 - vmlal.u32 q1, d21, d16 - vmlal.u32 q2, d25, d28 - vmlal.u32 q2, d24, d29 - vmlal.u32 q2, d23, d20 - vmlal.u32 q2, d22, d16 - vmlal.u32 q2, d21, d17 - vmlal.u32 q3, d25, d29 - vmlal.u32 q3, d24, d20 - vmlal.u32 q3, d23, d16 - vmlal.u32 q3, d22, d17 - vmlal.u32 q3, d21, d18 - vmlal.u32 q4, d25, d20 - vmlal.u32 q4, d24, d16 - vmlal.u32 q4, d23, d17 - vmlal.u32 q4, d22, d18 - vmlal.u32 q4, d21, d19 - vshr.u64 q11, q0, #26 - vand q0, q0, q15 - vadd.i64 q1, q1, q11 - vshr.u64 q12, q3, #26 - vand q3, q3, q15 - vadd.i64 q4, q4, q12 - vshr.u64 q11, q1, #26 - vand q1, q1, q15 - vadd.i64 q2, q2, q11 - vshr.u64 q12, q4, #26 - vand q4, q4, q15 - vadd.i64 q0, q0, q12 - vshl.i64 q12, q12, #2 - vadd.i64 q0, q0, q12 - vshr.u64 q11, q2, #26 - vand q2, q2, q15 - vadd.i64 q3, q3, q11 - vshr.u64 q12, q0, #26 - vand q0, q0, q15 - vadd.i64 q1, q1, q12 - vshr.u64 q11, q3, #26 - vand q3, q3, q15 - vadd.i64 q4, q4, q11 - vmovn.i64 d21, q0 - vmovn.i64 d22, q1 - vmovn.i64 d23, q2 - vmovn.i64 d24, q3 - vmovn.i64 d25, q4 -.Lpoly1305_blocks_neon_done: - tst r0, r0 - beq .Lpoly1305_blocks_neon_final - ldr r2, [sp, #108] - add r2, r2, #60 - vst1.64 {d21}, [r2]! - vst1.64 {d22-d25}, [r2] - b .Lpoly1305_blocks_neon_leave -.Lpoly1305_blocks_neon_final: - vadd.u32 d10, d0, d1 - vadd.u32 d13, d2, d3 - vadd.u32 d11, d4, d5 - ldr r5, [sp, #108] - vadd.u32 d14, d6, d7 - vadd.u32 d12, d8, d9 - vtrn.32 d10, d13 - vtrn.32 d11, d14 - vst1.64 {d10-d12}, [sp] - ldm sp, {r0-r4} - mov r12, r0, lsr #26 - and r0, r0, #0x3ffffff - add r1, r1, r12 - mov r12, r1, lsr #26 - and r1, r1, #0x3ffffff - add r2, r2, r12 - mov r12, r2, lsr #26 - and r2, r2, #0x3ffffff - add r3, r3, r12 - mov r12, r3, lsr #26 - and r3, r3, #0x3ffffff - add r4, r4, r12 - mov r12, r4, lsr #26 - and r4, r4, #0x3ffffff - add r12, r12, r12, lsl #2 - add r0, r0, r12 - mov r12, r0, lsr #26 - and r0, r0, #0x3ffffff - add r1, r1, r12 - mov r12, r1, lsr #26 - and r1, r1, #0x3ffffff - add r2, r2, r12 - mov r12, r2, lsr #26 - and r2, r2, #0x3ffffff - add r3, r3, r12 - mov r12, r3, lsr #26 - and r3, r3, #0x3ffffff - add r4, r4, r12 - mov r12, r4, lsr #26 - and r4, r4, #0x3ffffff - add r12, r12, r12, lsl #2 - add r0, r0, r12 - mov r12, r0, lsr #26 - and r0, r0, #0x3ffffff - add r1, r1, r12 - add r6, r0, #5 - mov r12, r6, lsr #26 - and r6, r6, #0x3ffffff - add r7, r1, r12 - mov r12, r7, lsr #26 - and r7, r7, #0x3ffffff - add r10, r2, r12 - mov r12, r10, lsr #26 - and r10, r10, #0x3ffffff - add r11, r3, r12 - mov r12, #-(1 << 26) - add r12, r12, r11, lsr #26 - and r11, r11, #0x3ffffff - add r14, r4, r12 - mov r12, r14, lsr #31 - sub r12, #1 - and r6, r6, r12 - and r7, r7, r12 - and r10, r10, r12 - and r11, r11, r12 - and r14, r14, r12 - mvn r12, r12 - and r0, r0, r12 - and r1, r1, r12 - and r2, r2, r12 - and r3, r3, r12 - and r4, r4, r12 - orr r0, r0, r6 - orr r1, r1, r7 - orr r2, r2, r10 - orr r3, r3, r11 - orr r4, r4, r14 - orr r0, r0, r1, lsl #26 - lsr r1, r1, #6 - orr r1, r1, r2, lsl #20 - lsr r2, r2, #12 - orr r2, r2, r3, lsl #14 - lsr r3, r3, #18 - orr r3, r3, r4, lsl #8 - add r5, r5, #60 - stm r5, {r0-r3} -.Lpoly1305_blocks_neon_leave: - sub r0, sp, #8 - ldr sp, [sp, #120] - ldmfd sp!, {r4-r11, lr} - vldm sp!, {q4-q7} - sub r0, sp, r0 - bx lr -.size _gcry_poly1305_armv7_neon_blocks,.-_gcry_poly1305_armv7_neon_blocks; - -.globl _gcry_poly1305_armv7_neon_finish_ext -.type _gcry_poly1305_armv7_neon_finish_ext,%function; -_gcry_poly1305_armv7_neon_finish_ext: -.Lpoly1305_finish_ext_neon_local: - stmfd sp!, {r4-r11, lr} - sub sp, sp, #32 - mov r5, r0 - mov r6, r1 - mov r7, r2 - mov r8, r3 - ands r7, r7, r7 - beq .Lpoly1305_finish_ext_neon_noremaining - mov r9, sp - veor q0, q0, q0 - veor q1, q1, q1 - vst1.64 {q0-q1}, [sp] - tst r7, #16 - beq .Lpoly1305_finish_ext_neon_skip16 - vld1.u64 {q0}, [r1]! - vst1.64 {q0}, [r9]! -.Lpoly1305_finish_ext_neon_skip16: - tst r7, #8 - beq .Lpoly1305_finish_ext_neon_skip8 - UNALIGNED_LDMIA2(r1, r10, r11) - stmia r9!, {r10-r11} -.Lpoly1305_finish_ext_neon_skip8: - tst r7, #4 - beq .Lpoly1305_finish_ext_neon_skip4 - ldr r10, [r1], #4 - str r10, [r9], #4 -.Lpoly1305_finish_ext_neon_skip4: - tst r7, #2 - beq .Lpoly1305_finish_ext_neon_skip2 - ldrh r10, [r1], #2 - strh r10, [r9], #2 -.Lpoly1305_finish_ext_neon_skip2: - tst r7, #1 - beq .Lpoly1305_finish_ext_neon_skip1 - ldrb r10, [r1], #1 - strb r10, [r9], #1 -.Lpoly1305_finish_ext_neon_skip1: - cmp r7, #16 - beq .Lpoly1305_finish_ext_neon_skipfinalbit - mov r10, #1 - strb r10, [r9] -.Lpoly1305_finish_ext_neon_skipfinalbit: - ldr r10, [r5, #116] - orrhs r10, #2 - orrlo r10, #4 - str r10, [r5, #116] - mov r0, r5 - mov r1, sp - mov r2, #32 - bl .Lpoly1305_blocks_neon_local -.Lpoly1305_finish_ext_neon_noremaining: - ldr r10, [r5, #116] - tst r10, #1 - beq .Lpoly1305_finish_ext_neon_notstarted - cmp r7, #0 - beq .Lpoly1305_finish_ext_neon_user2r - cmp r7, #16 - bls .Lpoly1305_finish_ext_neon_user1 -.Lpoly1305_finish_ext_neon_user2r: - orr r10, r10, #8 - b .Lpoly1305_finish_ext_neon_finalblock -.Lpoly1305_finish_ext_neon_user1: - orr r10, r10, #16 -.Lpoly1305_finish_ext_neon_finalblock: - str r10, [r5, #116] - mov r0, r5 - eor r1, r1, r1 - mov r2, #32 - bl .Lpoly1305_blocks_neon_local -.Lpoly1305_finish_ext_neon_notstarted: - add r0, r5, #60 - add r9, r5, #100 - ldm r0, {r0-r3} - ldm r9, {r9-r12} - adds r0, r0, r9 - adcs r1, r1, r10 - adcs r2, r2, r11 - adcs r3, r3, r12 - stm r8, {r0-r3} - veor q0, q0, q0 - veor q1, q1, q1 - veor q2, q2, q2 - veor q3, q3, q3 - vstmia r5!, {q0-q3} - vstm r5, {q0-q3} - add sp, sp, #32 - ldmfd sp!, {r4-r11, lr} - mov r0, #(9*4+32) - bx lr -.size _gcry_poly1305_armv7_neon_finish_ext,.-_gcry_poly1305_armv7_neon_finish_ext; - -#endif diff --git a/cipher/poly1305-avx2-amd64.S b/cipher/poly1305-avx2-amd64.S deleted file mode 100644 index 9362a5ae..00000000 --- a/cipher/poly1305-avx2-amd64.S +++ /dev/null @@ -1,962 +0,0 @@ -/* poly1305-avx2-amd64.S - AMD64/AVX2 implementation of Poly1305 - * - * Copyright (C) 2014 Jussi Kivilinna - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see . - */ - -/* - * Based on public domain implementation by Andrew Moon at - * https://github.com/floodyberry/poly1305-opt - */ - -#include - -#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ - defined(ENABLE_AVX2_SUPPORT) - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif - - -.text - - -.align 8 -.globl _gcry_poly1305_amd64_avx2_init_ext -ELF(.type _gcry_poly1305_amd64_avx2_init_ext,@function;) -_gcry_poly1305_amd64_avx2_init_ext: -.Lpoly1305_init_ext_avx2_local: - xor %edx, %edx - vzeroupper - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - pushq %rbx - movq %rdx, %rcx - vpxor %ymm0, %ymm0, %ymm0 - movq $-1, %r8 - testq %rcx, %rcx - vmovdqu %ymm0, (%rdi) - vmovdqu %ymm0, 32(%rdi) - vmovdqu %ymm0, 64(%rdi) - vmovdqu %ymm0, 96(%rdi) - vmovdqu %ymm0, 128(%rdi) - movq 8(%rsi), %r9 - cmove %r8, %rcx - movq $0xffc0fffffff, %r8 - movq %r9, %r13 - movq (%rsi), %r10 - andq %r10, %r8 - shrq $44, %r10 - movq %r8, %r14 - shlq $20, %r13 - orq %r13, %r10 - movq $0xfffffc0ffff, %r13 - shrq $24, %r9 - andq %r13, %r10 - movq $0xffffffc0f, %r13 - andq %r13, %r9 - movl %r8d, %r13d - andl $67108863, %r13d - movl %r13d, 164(%rdi) - movq %r10, %r13 - shrq $26, %r14 - shlq $18, %r13 - orq %r13, %r14 - movq %r10, %r13 - shrq $8, %r13 - andl $67108863, %r14d - andl $67108863, %r13d - movl %r14d, 172(%rdi) - movq %r10, %r14 - movl %r13d, 180(%rdi) - movq %r9, %r13 - shrq $34, %r14 - shlq $10, %r13 - orq %r13, %r14 - movq %r9, %r13 - shrq $16, %r13 - andl $67108863, %r14d - movl %r14d, 188(%rdi) - movl %r13d, 196(%rdi) - cmpq $16, %rcx - jbe .Lpoly1305_init_ext_avx2_continue - lea (%r9,%r9,4), %r11 - shlq $2, %r11 - lea (%r10,%r10), %rax - mulq %r11 - movq %rax, %r13 - movq %r8, %rax - movq %rdx, %r14 - mulq %r8 - addq %rax, %r13 - lea (%r8,%r8), %rax - movq %r13, %r12 - adcq %rdx, %r14 - mulq %r10 - shlq $20, %r14 - movq %rax, %r15 - shrq $44, %r12 - movq %r11, %rax - orq %r12, %r14 - movq %rdx, %r12 - mulq %r9 - addq %rax, %r15 - movq %r8, %rax - adcq %rdx, %r12 - addq %r15, %r14 - lea (%r9,%r9), %r15 - movq %r14, %rbx - adcq $0, %r12 - mulq %r15 - shlq $20, %r12 - movq %rdx, %r11 - shrq $44, %rbx - orq %rbx, %r12 - movq %rax, %rbx - movq %r10, %rax - mulq %r10 - addq %rax, %rbx - adcq %rdx, %r11 - addq %rbx, %r12 - movq $0xfffffffffff, %rbx - movq %r12, %r15 - adcq $0, %r11 - andq %rbx, %r13 - shlq $22, %r11 - andq %rbx, %r14 - shrq $42, %r15 - orq %r15, %r11 - lea (%r11,%r11,4), %r11 - addq %r11, %r13 - movq %rbx, %r11 - andq %r13, %r11 - shrq $44, %r13 - movq %r11, %r15 - addq %r13, %r14 - movq $0x3ffffffffff, %r13 - andq %r14, %rbx - andq %r13, %r12 - movq %rbx, %r13 - shrq $26, %r15 - shlq $18, %r13 - orq %r13, %r15 - movq %rbx, %r13 - shrq $44, %r14 - shrq $8, %r13 - addq %r14, %r12 - movl %r11d, %r14d - andl $67108863, %r15d - andl $67108863, %r14d - andl $67108863, %r13d - movl %r14d, 204(%rdi) - movq %rbx, %r14 - movl %r13d, 220(%rdi) - movq %r12, %r13 - shrq $34, %r14 - shlq $10, %r13 - orq %r13, %r14 - movq %r12, %r13 - shrq $16, %r13 - andl $67108863, %r14d - movl %r15d, 212(%rdi) - movl %r14d, 228(%rdi) - movl %r13d, 236(%rdi) - cmpq $32, %rcx - jbe .Lpoly1305_init_ext_avx2_continue - movq %r9, %rax - lea (%rbx,%rbx,4), %r14 - shlq $2, %r14 - mulq %r14 - movq %rdi, -32(%rsp) - lea (%r12,%r12,4), %rdi - shlq $2, %rdi - movq %rax, %r14 - movq %r10, %rax - movq %rdx, %r15 - mulq %rdi - movq %rax, %r13 - movq %r11, %rax - movq %rcx, -16(%rsp) - movq %rdx, %rcx - mulq %r8 - addq %rax, %r13 - movq %rdi, %rax - movq %rsi, -24(%rsp) - adcq %rdx, %rcx - addq %r13, %r14 - adcq %rcx, %r15 - movq %r14, %rcx - mulq %r9 - shlq $20, %r15 - movq %rax, %r13 - shrq $44, %rcx - movq %r11, %rax - orq %rcx, %r15 - movq %rdx, %rcx - mulq %r10 - movq %rax, %rsi - movq %rbx, %rax - movq %rdx, %rdi - mulq %r8 - addq %rax, %rsi - movq %r11, %rax - adcq %rdx, %rdi - addq %rsi, %r13 - adcq %rdi, %rcx - addq %r13, %r15 - movq %r15, %rdi - adcq $0, %rcx - mulq %r9 - shlq $20, %rcx - movq %rdx, %rsi - shrq $44, %rdi - orq %rdi, %rcx - movq %rax, %rdi - movq %rbx, %rax - mulq %r10 - movq %rax, %r9 - movq %r8, %rax - movq %rdx, %r10 - movq $0xfffffffffff, %r8 - mulq %r12 - addq %rax, %r9 - adcq %rdx, %r10 - andq %r8, %r14 - addq %r9, %rdi - adcq %r10, %rsi - andq %r8, %r15 - addq %rdi, %rcx - movq $0x3ffffffffff, %rdi - movq %rcx, %r10 - adcq $0, %rsi - andq %rdi, %rcx - shlq $22, %rsi - shrq $42, %r10 - orq %r10, %rsi - movq -32(%rsp), %rdi - lea (%rsi,%rsi,4), %r9 - movq %r8, %rsi - addq %r9, %r14 - andq %r14, %rsi - shrq $44, %r14 - addq %r14, %r15 - andq %r15, %r8 - shrq $44, %r15 - movq %r8, %r14 - addq %r15, %rcx - movl %esi, %r15d - movq %rcx, %r10 - movq %r8, %r9 - shrq $26, %rsi - andl $67108863, %r15d - shlq $18, %r14 - shrq $34, %r8 - orq %r14, %rsi - shlq $10, %r10 - shrq $8, %r9 - orq %r10, %r8 - shrq $16, %rcx - andl $67108863, %esi - movl %esi, 252(%rdi) - andl $67108863, %r9d - movl %ecx, 276(%rdi) - andl $67108863, %r8d - movl %r15d, 244(%rdi) - movl %r9d, 260(%rdi) - movl %r8d, 268(%rdi) - movq -16(%rsp), %rcx - movq -24(%rsp), %rsi -.Lpoly1305_init_ext_avx2_continue: - movl 16(%rsi), %r8d - movl %r8d, 284(%rdi) - movl 20(%rsi), %r9d - movl %r9d, 292(%rdi) - movl 24(%rsi), %r10d - movl %r10d, 300(%rdi) - movl 28(%rsi), %esi - movl %esi, 308(%rdi) - cmpq $48, %rcx - jbe .Lpoly1305_init_ext_avx2_done - lea (%r12,%r12,4), %r9 - shlq $2, %r9 - lea (%rbx,%rbx), %rax - mulq %r9 - movq %rax, %rsi - movq %r11, %rax - movq %rdx, %r8 - mulq %r11 - addq %rax, %rsi - lea (%r11,%r11), %rax - movq %rsi, %r10 - adcq %rdx, %r8 - mulq %rbx - movq %rax, %r13 - movq %r12, %rax - movq %rdx, %rcx - addq %r12, %r12 - mulq %r9 - addq %rax, %r13 - movq %r11, %rax - movq $0xfffffffffff, %r9 - adcq %rdx, %rcx - andq %r9, %rsi - mulq %r12 - shlq $20, %r8 - movq %rax, %r11 - shrq $44, %r10 - movq %rbx, %rax - orq %r10, %r8 - movq %rdx, %r12 - mulq %rbx - addq %r13, %r8 - movq %r8, %r14 - adcq $0, %rcx - andq %r9, %r8 - addq %rax, %r11 - adcq %rdx, %r12 - shlq $20, %rcx - shrq $44, %r14 - orq %r14, %rcx - addq %r11, %rcx - movq %rcx, %rbx - adcq $0, %r12 - shlq $22, %r12 - shrq $42, %rbx - orq %rbx, %r12 - movq %r9, %rbx - lea (%r12,%r12,4), %r15 - addq %r15, %rsi - andq %rsi, %rbx - shrq $44, %rsi - movl %ebx, %r11d - addq %rsi, %r8 - movq $0x3ffffffffff, %rsi - andq %r8, %r9 - andq %rsi, %rcx - shrq $44, %r8 - movq %r9, %rax - addq %r8, %rcx - movq %r9, %r8 - movq %rcx, %r10 - andl $67108863, %r11d - shrq $26, %rbx - shlq $18, %r8 - shrq $34, %r9 - orq %r8, %rbx - shlq $10, %r10 - shrq $8, %rax - orq %r10, %r9 - shrq $16, %rcx - andl $67108863, %ebx - andl $67108863, %eax - andl $67108863, %r9d - movl %r11d, 184(%rdi) - movl %r11d, 176(%rdi) - movl %r11d, 168(%rdi) - movl %r11d, 160(%rdi) - movl %ebx, 216(%rdi) - movl %ebx, 208(%rdi) - movl %ebx, 200(%rdi) - movl %ebx, 192(%rdi) - movl %eax, 248(%rdi) - movl %eax, 240(%rdi) - movl %eax, 232(%rdi) - movl %eax, 224(%rdi) - movl %r9d, 280(%rdi) - movl %r9d, 272(%rdi) - movl %r9d, 264(%rdi) - movl %r9d, 256(%rdi) - movl %ecx, 312(%rdi) - movl %ecx, 304(%rdi) - movl %ecx, 296(%rdi) - movl %ecx, 288(%rdi) -.Lpoly1305_init_ext_avx2_done: - movq $0, 320(%rdi) - vzeroall - popq %rbx - popq %r15 - popq %r14 - popq %r13 - popq %r12 - ret -ELF(.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;) - - -.align 8 -.globl _gcry_poly1305_amd64_avx2_blocks -ELF(.type _gcry_poly1305_amd64_avx2_blocks,@function;) -_gcry_poly1305_amd64_avx2_blocks: -.Lpoly1305_blocks_avx2_local: - vzeroupper - pushq %rbp - movq %rsp, %rbp - pushq %rbx - andq $-64, %rsp - subq $200, %rsp - movl $((1<<26)-1), %r8d - movl $(5), %r9d - movl $((1<<24)), %r10d - vmovd %r8d, %xmm0 - vmovd %r9d, %xmm8 - vmovd %r10d, %xmm7 - vpbroadcastq %xmm0, %ymm0 - vpbroadcastq %xmm8, %ymm8 - vpbroadcastq %xmm7, %ymm7 - vmovdqa %ymm7, 168(%rsp) - movq 320(%rdi), %rax - testb $60, %al - je .Lpoly1305_blocks_avx2_9 - vmovdqa 168(%rsp), %ymm7 - vpsrldq $8, %ymm7, %ymm1 - vmovdqa %ymm1, 168(%rsp) - testb $4, %al - je .Lpoly1305_blocks_avx2_10 - vpermq $192, %ymm1, %ymm7 - vmovdqa %ymm7, 168(%rsp) -.Lpoly1305_blocks_avx2_10: - testb $8, %al - je .Lpoly1305_blocks_avx2_11 - vpermq $240, 168(%rsp), %ymm7 - vmovdqa %ymm7, 168(%rsp) -.Lpoly1305_blocks_avx2_11: - testb $16, %al - je .Lpoly1305_blocks_avx2_12 - vpermq $252, 168(%rsp), %ymm6 - vmovdqa %ymm6, 168(%rsp) -.Lpoly1305_blocks_avx2_12: - testb $32, %al - je .Lpoly1305_blocks_avx2_9 - vpxor %xmm6, %xmm6, %xmm6 - vmovdqa %ymm6, 168(%rsp) -.Lpoly1305_blocks_avx2_9: - testb $1, %al - jne .Lpoly1305_blocks_avx2_13 - vmovdqu (%rsi), %ymm3 - vmovdqu 32(%rsi), %ymm1 - vpunpcklqdq %ymm1, %ymm3, %ymm2 - vpunpckhqdq %ymm1, %ymm3, %ymm1 - vpermq $216, %ymm2, %ymm2 - vpermq $216, %ymm1, %ymm1 - vpand %ymm2, %ymm0, %ymm5 - vpsrlq $26, %ymm2, %ymm4 - vpand %ymm4, %ymm0, %ymm4 - vpsllq $12, %ymm1, %ymm3 - vpsrlq $52, %ymm2, %ymm2 - vpor %ymm3, %ymm2, %ymm2 - vpand %ymm2, %ymm0, %ymm3 - vpsrlq $26, %ymm2, %ymm2 - vpand %ymm2, %ymm0, %ymm2 - vpsrlq $40, %ymm1, %ymm1 - vpor 168(%rsp), %ymm1, %ymm1 - addq $64, %rsi - subq $64, %rdx - orq $1, 320(%rdi) - jmp .Lpoly1305_blocks_avx2_14 -.Lpoly1305_blocks_avx2_13: - vmovdqa (%rdi), %ymm5 - vmovdqa 32(%rdi), %ymm4 - vmovdqa 64(%rdi), %ymm3 - vmovdqa 96(%rdi), %ymm2 - vmovdqa 128(%rdi), %ymm1 -.Lpoly1305_blocks_avx2_14: - cmpq $63, %rdx - jbe .Lpoly1305_blocks_avx2_15 - vmovdqa 160(%rdi), %ymm6 - vmovdqa %ymm8, 136(%rsp) - vmovdqa 192(%rdi), %ymm7 - vpmuludq %ymm8, %ymm7, %ymm11 - vmovdqa %ymm11, 104(%rsp) - vmovdqa 224(%rdi), %ymm11 - vmovdqa %ymm11, 72(%rsp) - vpmuludq %ymm11, %ymm8, %ymm11 - vmovdqa %ymm11, 40(%rsp) - vmovdqa 256(%rdi), %ymm11 - vmovdqa %ymm11, 8(%rsp) - vpmuludq %ymm11, %ymm8, %ymm11 - vmovdqa %ymm11, -24(%rsp) - vmovdqa 288(%rdi), %ymm13 - vmovdqa %ymm13, -56(%rsp) - vpmuludq %ymm13, %ymm8, %ymm13 - vmovdqa %ymm13, -88(%rsp) -.Lpoly1305_blocks_avx2_16: - vpmuludq 104(%rsp), %ymm1, %ymm14 - vmovdqa 40(%rsp), %ymm13 - vpmuludq %ymm13, %ymm2, %ymm8 - vpmuludq %ymm13, %ymm1, %ymm13 - vmovdqa -24(%rsp), %ymm9 - vpmuludq %ymm9, %ymm2, %ymm10 - vpmuludq %ymm9, %ymm1, %ymm11 - vpaddq %ymm8, %ymm14, %ymm14 - vpmuludq %ymm9, %ymm3, %ymm8 - vmovdqa -88(%rsp), %ymm12 - vpmuludq %ymm12, %ymm1, %ymm9 - vpaddq %ymm10, %ymm13, %ymm13 - vpmuludq %ymm12, %ymm4, %ymm15 - vmovdqa %ymm12, %ymm10 - vpmuludq %ymm12, %ymm3, %ymm12 - vpaddq %ymm8, %ymm14, %ymm14 - vpmuludq %ymm10, %ymm2, %ymm10 - vpmuludq %ymm6, %ymm2, %ymm8 - vpaddq %ymm15, %ymm14, %ymm14 - vpmuludq %ymm6, %ymm1, %ymm1 - vpaddq %ymm12, %ymm13, %ymm13 - vpmuludq %ymm6, %ymm5, %ymm15 - vpaddq %ymm10, %ymm11, %ymm11 - vpmuludq %ymm6, %ymm4, %ymm12 - vpaddq %ymm8, %ymm9, %ymm9 - vpmuludq %ymm6, %ymm3, %ymm10 - vpmuludq %ymm7, %ymm3, %ymm8 - vpaddq %ymm15, %ymm14, %ymm14 - vpmuludq %ymm7, %ymm2, %ymm2 - vpaddq %ymm12, %ymm13, %ymm12 - vpmuludq %ymm7, %ymm5, %ymm15 - vpaddq %ymm10, %ymm11, %ymm10 - vpmuludq %ymm7, %ymm4, %ymm13 - vpaddq %ymm8, %ymm9, %ymm8 - vmovdqa 72(%rsp), %ymm9 - vpmuludq %ymm9, %ymm4, %ymm11 - vpaddq %ymm2, %ymm1, %ymm1 - vpmuludq %ymm9, %ymm3, %ymm3 - vpaddq %ymm15, %ymm12, %ymm12 - vpmuludq %ymm9, %ymm5, %ymm15 - vpaddq %ymm13, %ymm10, %ymm10 - vmovdqa 8(%rsp), %ymm2 - vpmuludq %ymm2, %ymm5, %ymm9 - vpaddq %ymm11, %ymm8, %ymm8 - vpmuludq %ymm2, %ymm4, %ymm4 - vpaddq %ymm3, %ymm1, %ymm1 - vpmuludq -56(%rsp), %ymm5, %ymm5 - vpaddq %ymm15, %ymm10, %ymm10 - vpaddq %ymm9, %ymm8, %ymm8 - vpaddq %ymm4, %ymm1, %ymm1 - vpaddq %ymm5, %ymm1, %ymm5 - vmovdqu (%rsi), %ymm3 - vmovdqu 32(%rsi), %ymm2 - vperm2i128 $32, %ymm2, %ymm3, %ymm1 - vperm2i128 $49, %ymm2, %ymm3, %ymm2 - vpunpckldq %ymm2, %ymm1, %ymm15 - vpunpckhdq %ymm2, %ymm1, %ymm2 - vpxor %xmm4, %xmm4, %xmm4 - vpunpckldq %ymm4, %ymm15, %ymm1 - vpunpckhdq %ymm4, %ymm15, %ymm15 - vpunpckldq %ymm4, %ymm2, %ymm3 - vpunpckhdq %ymm4, %ymm2, %ymm2 - vpsllq $6, %ymm15, %ymm15 - vpsllq $12, %ymm3, %ymm3 - vpsllq $18, %ymm2, %ymm2 - vpaddq %ymm1, %ymm14, %ymm14 - vpaddq %ymm15, %ymm12, %ymm12 - vpaddq %ymm3, %ymm10, %ymm10 - vpaddq %ymm2, %ymm8, %ymm8 - vpaddq 168(%rsp), %ymm5, %ymm5 - addq $64, %rsi - vpsrlq $26, %ymm14, %ymm4 - vpsrlq $26, %ymm8, %ymm2 - vpand %ymm0, %ymm14, %ymm14 - vpand %ymm0, %ymm8, %ymm8 - vpaddq %ymm4, %ymm12, %ymm12 - vpaddq %ymm2, %ymm5, %ymm5 - vpsrlq $26, %ymm12, %ymm3 - vpsrlq $26, %ymm5, %ymm9 - vpand %ymm0, %ymm12, %ymm12 - vpand %ymm0, %ymm5, %ymm11 - vpaddq %ymm3, %ymm10, %ymm3 - vpmuludq 136(%rsp), %ymm9, %ymm9 - vpaddq %ymm9, %ymm14, %ymm14 - vpsrlq $26, %ymm3, %ymm2 - vpsrlq $26, %ymm14, %ymm4 - vpand %ymm0, %ymm3, %ymm3 - vpand %ymm0, %ymm14, %ymm5 - vpaddq %ymm2, %ymm8, %ymm2 - vpaddq %ymm4, %ymm12, %ymm4 - vpsrlq $26, %ymm2, %ymm1 - vpand %ymm0, %ymm2, %ymm2 - vpaddq %ymm1, %ymm11, %ymm1 - subq $64, %rdx - cmpq $63, %rdx - ja .Lpoly1305_blocks_avx2_16 -.Lpoly1305_blocks_avx2_15: - testb $64, 320(%rdi) - jne .Lpoly1305_blocks_avx2_17 - vmovdqa %ymm5, (%rdi) - vmovdqa %ymm4, 32(%rdi) - vmovdqa %ymm3, 64(%rdi) - vmovdqa %ymm2, 96(%rdi) - vmovdqa %ymm1, 128(%rdi) - jmp .Lpoly1305_blocks_avx2_8 -.Lpoly1305_blocks_avx2_17: - vpermq $245, %ymm5, %ymm0 - vpaddq %ymm0, %ymm5, %ymm5 - vpermq $245, %ymm4, %ymm0 - vpaddq %ymm0, %ymm4, %ymm4 - vpermq $245, %ymm3, %ymm0 - vpaddq %ymm0, %ymm3, %ymm3 - vpermq $245, %ymm2, %ymm0 - vpaddq %ymm0, %ymm2, %ymm2 - vpermq $245, %ymm1, %ymm0 - vpaddq %ymm0, %ymm1, %ymm1 - vpermq $170, %ymm5, %ymm0 - vpaddq %ymm0, %ymm5, %ymm5 - vpermq $170, %ymm4, %ymm0 - vpaddq %ymm0, %ymm4, %ymm4 - vpermq $170, %ymm3, %ymm0 - vpaddq %ymm0, %ymm3, %ymm3 - vpermq $170, %ymm2, %ymm0 - vpaddq %ymm0, %ymm2, %ymm2 - vpermq $170, %ymm1, %ymm0 - vpaddq %ymm0, %ymm1, %ymm1 - vmovd %xmm5, %eax - vmovd %xmm4, %edx - movl %eax, %ecx - shrl $26, %ecx - addl %edx, %ecx - movl %ecx, %edx - andl $67108863, %edx - vmovd %xmm3, %esi - shrl $26, %ecx - movl %ecx, %r11d - addl %esi, %r11d - vmovd %xmm2, %ecx - movl %r11d, %r10d - shrl $26, %r10d - addl %ecx, %r10d - movl %r10d, %r9d - andl $67108863, %r9d - vmovd %xmm1, %r8d - movl %edx, %esi - salq $26, %rsi - andl $67108863, %eax - orq %rax, %rsi - movabsq $17592186044415, %rax - andq %rax, %rsi - andl $67108863, %r11d - salq $8, %r11 - shrl $18, %edx - movl %edx, %edx - orq %r11, %rdx - movq %r9, %rcx - salq $34, %rcx - orq %rcx, %rdx - andq %rax, %rdx - shrl $26, %r10d - addl %r10d, %r8d - salq $16, %r8 - shrl $10, %r9d - movl %r9d, %r9d - orq %r9, %r8 - movabsq $4398046511103, %r10 - movq %r8, %r9 - andq %r10, %r9 - shrq $42, %r8 - leaq (%r8,%r8,4), %rcx - addq %rcx, %rsi - movq %rsi, %r8 - andq %rax, %r8 - movq %rsi, %rcx - shrq $44, %rcx - addq %rdx, %rcx - movq %rcx, %rsi - andq %rax, %rsi - shrq $44, %rcx - movq %rcx, %rdx - addq %r9, %rdx - andq %rdx, %r10 - shrq $42, %rdx - leaq (%r8,%rdx,4), %rcx - leaq (%rcx,%rdx), %rdx - movq %rdx, %rbx - andq %rax, %rbx - shrq $44, %rdx - movq %rdx, %r11 - addq %rsi, %r11 - leaq 5(%rbx), %r9 - movq %r9, %r8 - shrq $44, %r8 - addq %r11, %r8 - movabsq $-4398046511104, %rsi - addq %r10, %rsi - movq %r8, %rdx - shrq $44, %rdx - addq %rdx, %rsi - movq %rsi, %rdx - shrq $63, %rdx - subq $1, %rdx - movq %rdx, %rcx - notq %rcx - andq %rcx, %rbx - andq %rcx, %r11 - andq %r10, %rcx - andq %rax, %r9 - andq %rdx, %r9 - orq %r9, %rbx - movq %rbx, (%rdi) - andq %r8, %rax - andq %rdx, %rax - orq %rax, %r11 - movq %r11, 8(%rdi) - andq %rsi, %rdx - orq %rcx, %rdx - movq %rdx, 16(%rdi) -.Lpoly1305_blocks_avx2_8: - movq -8(%rbp), %rbx - vzeroall - movq %rbp, %rax - subq %rsp, %rax - leave - addq $8, %rax - ret -ELF(.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;) - - -.align 8 -.globl _gcry_poly1305_amd64_avx2_finish_ext -ELF(.type _gcry_poly1305_amd64_avx2_finish_ext,@function;) -_gcry_poly1305_amd64_avx2_finish_ext: -.Lpoly1305_finish_ext_avx2_local: - vzeroupper - pushq %rbp - movq %rsp, %rbp - pushq %r13 - pushq %r12 - pushq %rbx - andq $-64, %rsp - subq $64, %rsp - movq %rdi, %rbx - movq %rdx, %r13 - movq %rcx, %r12 - testq %rdx, %rdx - je .Lpoly1305_finish_ext_avx2_22 - vpxor %xmm0, %xmm0, %xmm0 - vmovdqa %ymm0, (%rsp) - vmovdqa %ymm0, 32(%rsp) - movq %rsp, %rax - subq %rsp, %rsi - testb $32, %dl - je .Lpoly1305_finish_ext_avx2_23 - vmovdqu (%rsp,%rsi), %ymm0 - vmovdqa %ymm0, (%rsp) - leaq 32(%rsp), %rax -.Lpoly1305_finish_ext_avx2_23: - testb $16, %r13b - je .Lpoly1305_finish_ext_avx2_24 - vmovdqu (%rax,%rsi), %xmm0 - vmovdqa %xmm0, (%rax) - addq $16, %rax -.Lpoly1305_finish_ext_avx2_24: - testb $8, %r13b - je .Lpoly1305_finish_ext_avx2_25 - movq (%rax,%rsi), %rdx - movq %rdx, (%rax) - addq $8, %rax -.Lpoly1305_finish_ext_avx2_25: - testb $4, %r13b - je .Lpoly1305_finish_ext_avx2_26 - movl (%rax,%rsi), %edx - movl %edx, (%rax) - addq $4, %rax -.Lpoly1305_finish_ext_avx2_26: - testb $2, %r13b - je .Lpoly1305_finish_ext_avx2_27 - movzwl (%rax,%rsi), %edx - movw %dx, (%rax) - addq $2, %rax -.Lpoly1305_finish_ext_avx2_27: - testb $1, %r13b - je .Lpoly1305_finish_ext_avx2_28 - movzbl (%rax,%rsi), %edx - movb %dl, (%rax) -.Lpoly1305_finish_ext_avx2_28: - testb $15, %r13b - je .Lpoly1305_finish_ext_avx2_29 - movb $1, (%rsp,%r13) -.Lpoly1305_finish_ext_avx2_29: - cmpq $47, %r13 - jbe .Lpoly1305_finish_ext_avx2_30 - orq $4, 320(%rbx) - jmp .Lpoly1305_finish_ext_avx2_31 -.Lpoly1305_finish_ext_avx2_30: - cmpq $31, %r13 - jbe .Lpoly1305_finish_ext_avx2_32 - orq $8, 320(%rbx) - jmp .Lpoly1305_finish_ext_avx2_31 -.Lpoly1305_finish_ext_avx2_32: - cmpq $15, %r13 - jbe .Lpoly1305_finish_ext_avx2_33 - orq $16, 320(%rbx) - jmp .Lpoly1305_finish_ext_avx2_31 -.Lpoly1305_finish_ext_avx2_33: - orq $32, 320(%rbx) -.Lpoly1305_finish_ext_avx2_31: - testb $1, 320(%rbx) - je .Lpoly1305_finish_ext_avx2_34 - cmpq $32, %r13 - ja .Lpoly1305_finish_ext_avx2_34 - cmpq $17, %r13 - sbbq %rsi, %rsi - notq %rsi - addq $2, %rsi - cmpq $17, %r13 - sbbq %rax, %rax - movq %rbx, %rdx - addq $23, %rax - leaq (%rbx,%rax,8), %rax - movl $0, %ecx -.Lpoly1305_finish_ext_avx2_37: - movl 244(%rdx), %edi - movl %edi, (%rax) - movl 252(%rdx), %edi - movl %edi, 32(%rax) - movl 260(%rdx), %edi - movl %edi, 64(%rax) - movl 268(%rdx), %edi - movl %edi, 96(%rax) - movl 276(%rdx), %edi - movl %edi, 128(%rax) - addq $1, %rcx - subq $40, %rdx - addq $8, %rax - cmpq %rcx, %rsi - ja .Lpoly1305_finish_ext_avx2_37 -.Lpoly1305_finish_ext_avx2_34: - movl $64, %edx - movq %rsp, %rsi - movq %rbx, %rdi - call .Lpoly1305_blocks_avx2_local -.Lpoly1305_finish_ext_avx2_22: - movq 320(%rbx), %r8 - testb $1, %r8b - je .Lpoly1305_finish_ext_avx2_38 - leaq -1(%r13), %rax - cmpq $47, %rax - ja .Lpoly1305_finish_ext_avx2_46 - cmpq $32, %r13 - ja .Lpoly1305_finish_ext_avx2_47 - cmpq $17, %r13 - sbbq %r9, %r9 - addq $2, %r9 - movl $0, %edi - cmpq $17, %r13 - sbbq %rax, %rax - notq %rax - andl $5, %eax - jmp .Lpoly1305_finish_ext_avx2_39 -.Lpoly1305_finish_ext_avx2_41: - movl (%rdx), %esi - movl %esi, (%rax) - movl 8(%rdx), %esi - movl %esi, 32(%rax) - movl 16(%rdx), %esi - movl %esi, 64(%rax) - movl 24(%rdx), %esi - movl %esi, 96(%rax) - movl 32(%rdx), %esi - movl %esi, 128(%rax) - addq $1, %rcx - subq $40, %rdx - addq $8, %rax - movq %rcx, %rsi - subq %rdi, %rsi - cmpq %rsi, %r9 - ja .Lpoly1305_finish_ext_avx2_41 - cmpq $3, %rcx - ja .Lpoly1305_finish_ext_avx2_42 - leaq 160(%rbx,%rcx,8), %rax -.Lpoly1305_finish_ext_avx2_43: - movl $1, (%rax) - movl $0, 32(%rax) - movl $0, 64(%rax) - movl $0, 96(%rax) - movl $0, 128(%rax) - addq $1, %rcx - addq $8, %rax - cmpq $4, %rcx - jne .Lpoly1305_finish_ext_avx2_43 -.Lpoly1305_finish_ext_avx2_42: - orq $96, %r8 - movq %r8, 320(%rbx) - vpxor %ymm0, %ymm0, %ymm0 - vmovdqa %ymm0, (%rsp) - vmovdqa %ymm0, 32(%rsp) - movl $64, %edx - movq %rsp, %rsi - movq %rbx, %rdi - call .Lpoly1305_blocks_avx2_local -.Lpoly1305_finish_ext_avx2_38: - movq 8(%rbx), %rax - movq %rax, %rdx - salq $44, %rdx - orq (%rbx), %rdx - shrq $20, %rax - movl $24, %edi - shlx %rdi, 16(%rbx), %rcx - orq %rcx, %rax - movl 292(%rbx), %ecx - salq $32, %rcx - movl 284(%rbx), %esi - orq %rsi, %rcx - movl 308(%rbx), %esi - salq $32, %rsi - movl 300(%rbx), %edi - orq %rdi, %rsi - addq %rcx, %rdx - adcq %rsi, %rax - movq %rdx, (%r12) - movq %rax, 8(%r12) - vpxor %xmm0, %xmm0, %xmm0 - vmovdqu %ymm0, (%rbx) - vmovdqu %ymm0, 32(%rbx) - vmovdqu %ymm0, 64(%rbx) - vmovdqu %ymm0, 96(%rbx) - vmovdqu %ymm0, 128(%rbx) - vmovdqu %ymm0, 160(%rbx) - vmovdqu %ymm0, 192(%rbx) - vmovdqu %ymm0, 224(%rbx) - jmp .Lpoly1305_finish_ext_avx2_49 -.Lpoly1305_finish_ext_avx2_46: - movl $3, %r9d - movl $1, %edi - movl $10, %eax - jmp .Lpoly1305_finish_ext_avx2_39 -.Lpoly1305_finish_ext_avx2_47: - movl $3, %r9d - movl $0, %edi - movl $10, %eax -.Lpoly1305_finish_ext_avx2_39: - leaq 164(%rbx,%rax,8), %rdx - leaq 160(%rbx,%rdi,8), %rax - movq %rdi, %rcx - jmp .Lpoly1305_finish_ext_avx2_41 -.Lpoly1305_finish_ext_avx2_49: - movq %rbp, %rax - subq %rsp, %rax - leaq -24(%rbp), %rsp - vzeroall - popq %rbx - popq %r12 - popq %r13 - popq %rbp - addq $(8*5), %rax -ret -ELF(.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;) - -#endif diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h index bcbe5df7..2405a090 100644 --- a/cipher/poly1305-internal.h +++ b/cipher/poly1305-internal.h @@ -1,167 +1,62 @@ /* poly1305-internal.h - Poly1305 internals * Copyright (C) 2014 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifndef G10_POLY1305_INTERNAL_H #define G10_POLY1305_INTERNAL_H #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" - #define POLY1305_TAGLEN 16 #define POLY1305_KEYLEN 32 +#define POLY1305_BLOCKSIZE 16 -/* Block-size used in default implementation. */ -#define POLY1305_REF_BLOCKSIZE 16 - -/* State size of default implementation. */ -#define POLY1305_REF_STATESIZE 64 - -/* State alignment for default implementation. */ -#define POLY1305_REF_ALIGNMENT sizeof(void *) - - -#undef POLY1305_SYSV_FUNC_ABI - -/* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ -#undef POLY1305_USE_SSE2 -#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) -# define POLY1305_USE_SSE2 1 -# define POLY1305_SSE2_BLOCKSIZE 32 -# define POLY1305_SSE2_STATESIZE 248 -# define POLY1305_SSE2_ALIGNMENT 16 -# define POLY1305_SYSV_FUNC_ABI 1 -#endif - - -/* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ -#undef POLY1305_USE_AVX2 -#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ - defined(ENABLE_AVX2_SUPPORT) -# define POLY1305_USE_AVX2 1 -# define POLY1305_AVX2_BLOCKSIZE 64 -# define POLY1305_AVX2_STATESIZE 328 -# define POLY1305_AVX2_ALIGNMENT 32 -# define POLY1305_SYSV_FUNC_ABI 1 -#endif - - -/* POLY1305_USE_NEON indicates whether to enable ARM NEON assembly code. */ -#undef POLY1305_USE_NEON -#if defined(ENABLE_NEON_SUPPORT) && defined(HAVE_ARM_ARCH_V6) && \ - defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_NEON) -# define POLY1305_USE_NEON 1 -# define POLY1305_NEON_BLOCKSIZE 32 -# define POLY1305_NEON_STATESIZE 128 -# define POLY1305_NEON_ALIGNMENT 16 -#endif - - -/* Largest block-size used in any implementation (optimized implementations - * might use block-size multiple of 16). */ -#ifdef POLY1305_USE_AVX2 -# define POLY1305_LARGEST_BLOCKSIZE POLY1305_AVX2_BLOCKSIZE -#elif defined(POLY1305_USE_NEON) -# define POLY1305_LARGEST_BLOCKSIZE POLY1305_NEON_BLOCKSIZE -#elif defined(POLY1305_USE_SSE2) -# define POLY1305_LARGEST_BLOCKSIZE POLY1305_SSE2_BLOCKSIZE -#else -# define POLY1305_LARGEST_BLOCKSIZE POLY1305_REF_BLOCKSIZE -#endif - -/* Largest state-size used in any implementation. */ -#ifdef POLY1305_USE_AVX2 -# define POLY1305_LARGEST_STATESIZE POLY1305_AVX2_STATESIZE -#elif defined(POLY1305_USE_NEON) -# define POLY1305_LARGEST_STATESIZE POLY1305_NEON_STATESIZE -#elif defined(POLY1305_USE_SSE2) -# define POLY1305_LARGEST_STATESIZE POLY1305_SSE2_STATESIZE -#else -# define POLY1305_LARGEST_STATESIZE POLY1305_REF_STATESIZE -#endif - -/* Minimum alignment for state pointer passed to implementations. */ -#ifdef POLY1305_USE_AVX2 -# define POLY1305_STATE_ALIGNMENT POLY1305_AVX2_ALIGNMENT -#elif defined(POLY1305_USE_NEON) -# define POLY1305_STATE_ALIGNMENT POLY1305_NEON_ALIGNMENT -#elif defined(POLY1305_USE_SSE2) -# define POLY1305_STATE_ALIGNMENT POLY1305_SSE2_ALIGNMENT -#else -# define POLY1305_STATE_ALIGNMENT POLY1305_REF_ALIGNMENT -#endif - - -/* Assembly implementations use SystemV ABI, ABI conversion and additional - * stack to store XMM6-XMM15 needed on Win64. */ -#undef OPS_FUNC_ABI -#if defined(POLY1305_SYSV_FUNC_ABI) && \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) -# define OPS_FUNC_ABI __attribute__((sysv_abi)) -#else -# define OPS_FUNC_ABI -#endif - - -typedef struct poly1305_key_s +typedef struct { - byte b[POLY1305_KEYLEN]; -} poly1305_key_t; - - -typedef struct poly1305_ops_s -{ - size_t block_size; - void (*init_ext) (void *ctx, const poly1305_key_t * key) OPS_FUNC_ABI; - unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes) OPS_FUNC_ABI; - unsigned int (*finish_ext) (void *ctx, const byte * m, size_t remaining, - byte mac[POLY1305_TAGLEN]) OPS_FUNC_ABI; -} poly1305_ops_t; - + u32 k[4]; + u32 r[4]; + u32 h[5]; +} POLY1305_STATE; typedef struct poly1305_context_s { - byte state[POLY1305_LARGEST_STATESIZE + POLY1305_STATE_ALIGNMENT]; - byte buffer[POLY1305_LARGEST_BLOCKSIZE]; - const poly1305_ops_t *ops; + POLY1305_STATE state; + byte buffer[POLY1305_BLOCKSIZE]; unsigned int leftover; } poly1305_context_t; -gcry_err_code_t _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, +gcry_err_code_t _gcry_poly1305_init (poly1305_context_t *ctx, const byte *key, size_t keylen); -void _gcry_poly1305_finish (poly1305_context_t * ctx, - byte mac[POLY1305_TAGLEN]); +void _gcry_poly1305_finish (poly1305_context_t *ctx, + byte mac[POLY1305_TAGLEN]); -void _gcry_poly1305_update (poly1305_context_t * ctx, const byte * buf, - size_t buflen); +void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *buf, + size_t buflen); #endif /* G10_POLY1305_INTERNAL_H */ diff --git a/cipher/poly1305-sse2-amd64.S b/cipher/poly1305-sse2-amd64.S deleted file mode 100644 index 219eb077..00000000 --- a/cipher/poly1305-sse2-amd64.S +++ /dev/null @@ -1,1043 +0,0 @@ -/* poly1305-sse2-amd64.S - AMD64/SSE2 implementation of Poly1305 - * - * Copyright (C) 2014 Jussi Kivilinna - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see . - */ - -/* - * Based on public domain implementation by Andrew Moon at - * https://github.com/floodyberry/poly1305-opt - */ - -#include - -#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ - defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) - -#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS -# define ELF(...) __VA_ARGS__ -#else -# define ELF(...) /*_*/ -#endif - - -.text - - -.align 8 -.globl _gcry_poly1305_amd64_sse2_init_ext -ELF(.type _gcry_poly1305_amd64_sse2_init_ext,@function;) -_gcry_poly1305_amd64_sse2_init_ext: -.Lpoly1305_init_ext_x86_local: - xor %edx, %edx - pushq %r12 - pushq %r13 - pushq %r14 - movq %rdx, %r10 - movq $-1, %rcx - testq %r10, %r10 - pxor %xmm0, %xmm0 - movq $0xfffffc0ffff, %r9 - movdqa %xmm0, (%rdi) - cmove %rcx, %r10 - movdqa %xmm0, 16(%rdi) - movq $0xffc0fffffff, %rcx - movdqa %xmm0, 32(%rdi) - movdqa %xmm0, 48(%rdi) - movdqa %xmm0, 64(%rdi) - movq 8(%rsi), %r11 - movq %r11, %r8 - movq (%rsi), %r12 - andq %r12, %rcx - shrq $44, %r12 - shlq $20, %r8 - shrq $24, %r11 - orq %r8, %r12 - movq $0xffffffc0f, %r8 - andq %r9, %r12 - andq %r8, %r11 - movl %ecx, %r8d - andl $67108863, %r8d - movq %rcx, %r9 - movl %r8d, 84(%rdi) - movq %r12, %r8 - shrq $26, %r9 - shlq $18, %r8 - orq %r8, %r9 - movq %r12, %r8 - shrq $8, %r8 - andl $67108863, %r9d - andl $67108863, %r8d - movl %r9d, 92(%rdi) - movq %r12, %r9 - movl %r8d, 100(%rdi) - movq %r11, %r8 - shrq $34, %r9 - shlq $10, %r8 - orq %r8, %r9 - movq %r11, %r8 - shrq $16, %r8 - andl $67108863, %r9d - movl %r9d, 108(%rdi) - cmpq $16, %r10 - movl %r8d, 116(%rdi) - movl 16(%rsi), %r8d - movl %r8d, 124(%rdi) - movl 20(%rsi), %r8d - movl %r8d, 132(%rdi) - movl 24(%rsi), %r8d - movl %r8d, 140(%rdi) - movl 28(%rsi), %esi - movl %esi, 148(%rdi) - jbe .Lpoly1305_init_ext_sse2_done - lea (%r11,%r11,4), %r14 - shlq $2, %r14 - lea (%r12,%r12), %rax - mulq %r14 - movq %rax, %r13 - movq %rcx, %rax - movq %rdx, %r8 - mulq %rcx - addq %rax, %r13 - lea (%rcx,%rcx), %rax - movq %r13, %r9 - adcq %rdx, %r8 - mulq %r12 - shlq $20, %r8 - movq %rax, %rsi - shrq $44, %r9 - movq %r11, %rax - orq %r9, %r8 - movq %rdx, %r9 - mulq %r14 - addq %rax, %rsi - movq %rcx, %rax - adcq %rdx, %r9 - addq %r11, %r11 - mulq %r11 - addq %rsi, %r8 - movq %rax, %r11 - movq %r12, %rax - movq %rdx, %rcx - adcq $0, %r9 - mulq %r12 - addq %rax, %r11 - movq %r8, %rsi - adcq %rdx, %rcx - shlq $20, %r9 - shrq $44, %rsi - orq %rsi, %r9 - movq $0xfffffffffff, %rsi - addq %r11, %r9 - movq %r9, %r12 - adcq $0, %rcx - andq %rsi, %r13 - shlq $22, %rcx - andq %rsi, %r8 - shrq $42, %r12 - orq %r12, %rcx - movq %rsi, %r12 - lea (%rcx,%rcx,4), %rcx - addq %rcx, %r13 - movq %rsi, %rcx - andq %r13, %rcx - shrq $44, %r13 - movq %rcx, %r14 - addq %r13, %r8 - movq $0x3ffffffffff, %r13 - andq %r8, %r12 - andq %r13, %r9 - shrq $44, %r8 - movq %r12, %r11 - addq %r8, %r9 - movq %r12, %rax - movq %r9, %r13 - movl %ecx, %r8d - shrq $26, %r14 - andl $67108863, %r8d - shlq $18, %r11 - shrq $34, %rax - orq %r11, %r14 - shlq $10, %r13 - movq %r12, %r11 - orq %r13, %rax - movq %r9, %r13 - shrq $8, %r11 - shrq $16, %r13 - andl $67108863, %r14d - andl $67108863, %r11d - andl $67108863, %eax - movl %r8d, 88(%rdi) - cmpq $64, %r10 - movl %r8d, 80(%rdi) - movl %r14d, 104(%rdi) - movl %r14d, 96(%rdi) - movl %r11d, 120(%rdi) - movl %r11d, 112(%rdi) - movl %eax, 136(%rdi) - movl %eax, 128(%rdi) - movl %r13d, 152(%rdi) - movl %r13d, 144(%rdi) - jbe .Lpoly1305_init_ext_sse2_done - lea (%r9,%r9,4), %r14 - shlq $2, %r14 - lea (%r12,%r12), %rax - mulq %r14 - movq %rax, %r8 - movq %rcx, %rax - movq %rdx, %r10 - mulq %rcx - addq %rax, %r8 - lea (%rcx,%rcx), %rax - movq %r8, %r11 - adcq %rdx, %r10 - andq %rsi, %r8 - mulq %r12 - shlq $20, %r10 - movq %rax, %r13 - shrq $44, %r11 - movq %r9, %rax - orq %r11, %r10 - movq %rdx, %r11 - mulq %r14 - addq %rax, %r13 - movq %rcx, %rax - adcq %rdx, %r11 - addq %r9, %r9 - mulq %r9 - addq %r13, %r10 - movq %rax, %r9 - movq %r12, %rax - movq %rdx, %rcx - adcq $0, %r11 - mulq %r12 - addq %rax, %r9 - movq %r10, %r13 - adcq %rdx, %rcx - andq %rsi, %r10 - shlq $20, %r11 - shrq $44, %r13 - orq %r13, %r11 - addq %r9, %r11 - movq %rsi, %r9 - movq %r11, %r12 - adcq $0, %rcx - shlq $22, %rcx - shrq $42, %r12 - orq %r12, %rcx - lea (%rcx,%rcx,4), %rcx - addq %rcx, %r8 - andq %r8, %r9 - shrq $44, %r8 - movl %r9d, %eax - addq %r8, %r10 - movq $0x3ffffffffff, %r8 - andq %r10, %rsi - andq %r8, %r11 - shrq $44, %r10 - movq %rsi, %r8 - addq %r10, %r11 - andl $67108863, %eax - shrq $26, %r9 - movq %r11, %r10 - shlq $18, %r8 - shlq $10, %r10 - orq %r8, %r9 - movq %rsi, %r8 - shrq $34, %rsi - andl $67108863, %r9d - shrq $8, %r8 - orq %r10, %rsi - shrq $16, %r11 - andl $67108863, %r8d - andl $67108863, %esi - movl %eax, 168(%rdi) - movl %eax, 160(%rdi) - movl %r9d, 184(%rdi) - movl %r9d, 176(%rdi) - movl %r8d, 200(%rdi) - movl %r8d, 192(%rdi) - movl %esi, 216(%rdi) - movl %esi, 208(%rdi) - movl %r11d, 232(%rdi) - movl %r11d, 224(%rdi) -.Lpoly1305_init_ext_sse2_done: - movq $0, 240(%rdi) - popq %r14 - popq %r13 - popq %r12 - ret -ELF(.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;) - - -.align 8 -.globl _gcry_poly1305_amd64_sse2_finish_ext -ELF(.type _gcry_poly1305_amd64_sse2_finish_ext,@function;) -_gcry_poly1305_amd64_sse2_finish_ext: -.Lpoly1305_finish_ext_x86_local: - pushq %rbp - movq %rsp, %rbp - subq $64, %rsp - andq $~63, %rsp - movq %rdx, 32(%rsp) - movq %rcx, 40(%rsp) - andq %rdx, %rdx - jz .Lpoly1305_finish_x86_no_leftover - pxor %xmm0, %xmm0 - movdqa %xmm0, 0+0(%rsp) - movdqa %xmm0, 16+0(%rsp) - leaq 0(%rsp), %r8 - testq $16, %rdx - jz .Lpoly1305_finish_x86_skip16 - movdqu 0(%rsi), %xmm0 - movdqa %xmm0, 0(%r8) - addq $16, %rsi - addq $16, %r8 -.Lpoly1305_finish_x86_skip16: - testq $8, %rdx - jz .Lpoly1305_finish_x86_skip8 - movq 0(%rsi), %rax - movq %rax, 0(%r8) - addq $8, %rsi - addq $8, %r8 -.Lpoly1305_finish_x86_skip8: - testq $4, %rdx - jz .Lpoly1305_finish_x86_skip4 - movl 0(%rsi), %eax - movl %eax, 0(%r8) - addq $4, %rsi - addq $4, %r8 -.Lpoly1305_finish_x86_skip4: - testq $2, %rdx - jz .Lpoly1305_finish_x86_skip2 - movw 0(%rsi), %ax - movw %ax, 0(%r8) - addq $2, %rsi - addq $2, %r8 -.Lpoly1305_finish_x86_skip2: - testq $1, %rdx - jz .Lpoly1305_finish_x86_skip1 - movb 0(%rsi), %al - movb %al, 0(%r8) - addq $1, %r8 -.Lpoly1305_finish_x86_skip1: - cmpq $16, %rdx - je .Lpoly1305_finish_x86_is16 - movb $1, 0(%r8) -.Lpoly1305_finish_x86_is16: - movq $4, %rax - jae .Lpoly1305_finish_x86_16andover - movq $8, %rax -.Lpoly1305_finish_x86_16andover: - orq %rax, 240(%rdi) - leaq 0(%rsp), %rsi - movq $32, %rdx - callq .Lpoly1305_blocks_x86_local -.Lpoly1305_finish_x86_no_leftover: - testq $1, 240(%rdi) - jz .Lpoly1305_finish_x86_not_started - movq 32(%rsp), %rdx - andq %rdx, %rdx - jz .Lpoly1305_finish_x86_r2r - cmpq $16, %rdx - jg .Lpoly1305_finish_x86_r2r - xorl %r10d, %r10d - movl 84(%rdi), %eax - movl 92(%rdi), %ecx - movl 100(%rdi), %edx - movl 108(%rdi), %r8d - movl 116(%rdi), %r9d - movl %eax, 80(%rdi) - movl $1, 8+80(%rdi) - movl %ecx, 96(%rdi) - movl %r10d, 8+96(%rdi) - movl %edx, 112(%rdi) - movl %r10d, 8+112(%rdi) - movl %r8d, 128(%rdi) - movl %r10d, 8+128(%rdi) - movl %r9d, 144(%rdi) - movl %r10d, 8+144(%rdi) - jmp .Lpoly1305_finish_x86_combine -.Lpoly1305_finish_x86_r2r: - movl 84(%rdi), %eax - movl 92(%rdi), %ecx - movl 100(%rdi), %edx - movl 108(%rdi), %r8d - movl 116(%rdi), %r9d - movl %eax, 8+80(%rdi) - movl %ecx, 8+96(%rdi) - movl %edx, 8+112(%rdi) - movl %r8d, 8+128(%rdi) - movl %r9d, 8+144(%rdi) -.Lpoly1305_finish_x86_combine: - xorq %rsi, %rsi - movq $32, %rdx - callq .Lpoly1305_blocks_x86_local -.Lpoly1305_finish_x86_not_started: - movq 0(%rdi), %r8 - movq 8(%rdi), %r9 - movq %r9, %r10 - movq 16(%rdi), %r11 - shlq $44, %r9 - shrq $20, %r10 - shlq $24, %r11 - orq %r9, %r8 - orq %r11, %r10 - pxor %xmm0, %xmm0 - movl 124(%rdi), %eax - movl 132(%rdi), %ecx - movl 140(%rdi), %edx - movl 148(%rdi), %esi - movq 40(%rsp), %r11 - shlq $32, %rcx - shlq $32, %rsi - orq %rcx, %rax - orq %rsi, %rdx - addq %r8, %rax - adcq %r10, %rdx - movq %rax, 0(%r11) - movq %rdx, 8(%r11) - movq %rbp, %rax - subq %rsp, %rax - movq %rbp, %rsp - movdqa %xmm0, 0(%rdi) - movdqa %xmm0, 16(%rdi) - movdqa %xmm0, 32(%rdi) - movdqa %xmm0, 48(%rdi) - movdqa %xmm0, 64(%rdi) - movdqa %xmm0, 80(%rdi) - movdqa %xmm0, 96(%rdi) - movdqa %xmm0, 112(%rdi) - movdqa %xmm0, 128(%rdi) - movdqa %xmm0, 144(%rdi) - movdqa %xmm0, 160(%rdi) - movdqa %xmm0, 176(%rdi) - movdqa %xmm0, 192(%rdi) - movdqa %xmm0, 208(%rdi) - movdqa %xmm0, 224(%rdi) - popq %rbp - addq $8, %rax - ret -ELF(.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;) - - -.align 8 -.globl _gcry_poly1305_amd64_sse2_blocks -ELF(.type _gcry_poly1305_amd64_sse2_blocks,@function;) -_gcry_poly1305_amd64_sse2_blocks: -.Lpoly1305_blocks_x86_local: - pushq %rbp - movq %rsp, %rbp - pushq %rbx - andq $-64, %rsp - subq $328, %rsp - movq 240(%rdi), %rax - movl $(1<<24), %r8d - movl $((1<<26)-1), %r9d - movd %r8, %xmm0 - movd %r9, %xmm5 - pshufd $0x44, %xmm0, %xmm0 - pshufd $0x44, %xmm5, %xmm5 - testb $4, %al - je .Lpoly1305_blocks_x86_3 - psrldq $8, %xmm0 -.Lpoly1305_blocks_x86_3: - testb $8, %al - je .Lpoly1305_blocks_x86_4 - pxor %xmm0, %xmm0 -.Lpoly1305_blocks_x86_4: - movdqa %xmm0, 168(%rsp) - testb $1, %al - jne .Lpoly1305_blocks_x86_5 - movq 16(%rsi), %xmm0 - movdqa %xmm5, %xmm7 - movdqa %xmm5, %xmm10 - movq (%rsi), %xmm6 - orq $1, %rax - subq $32, %rdx - movq 8(%rsi), %xmm1 - punpcklqdq %xmm0, %xmm6 - movq 24(%rsi), %xmm0 - pand %xmm6, %xmm7 - movdqa %xmm6, %xmm9 - psrlq $52, %xmm6 - addq $32, %rsi - punpcklqdq %xmm0, %xmm1 - movdqa %xmm1, %xmm0 - psrlq $26, %xmm9 - psllq $12, %xmm0 - movq %rax, 240(%rdi) - pand %xmm5, %xmm9 - por %xmm0, %xmm6 - psrlq $40, %xmm1 - pand %xmm6, %xmm10 - por 168(%rsp), %xmm1 - psrlq $26, %xmm6 - pand %xmm5, %xmm6 -.Lpoly1305_blocks_x86_6: - movdqa 80(%rdi), %xmm13 - cmpq $63, %rdx - movl $(5), %r8d - movd %r8, %xmm14 - pshufd $0x44, %xmm14, %xmm14 - movdqa 96(%rdi), %xmm15 - movdqa %xmm13, -8(%rsp) - movdqa 112(%rdi), %xmm0 - movdqa %xmm14, 136(%rsp) - movdqa 128(%rdi), %xmm3 - movdqa %xmm15, 312(%rsp) - pmuludq %xmm14, %xmm15 - movdqa 144(%rdi), %xmm13 - movdqa %xmm0, 232(%rsp) - pmuludq %xmm14, %xmm0 - movdqa %xmm3, 152(%rsp) - pmuludq %xmm14, %xmm3 - movdqa %xmm13, 56(%rsp) - pmuludq %xmm14, %xmm13 - movdqa %xmm15, 40(%rsp) - movdqa %xmm0, -24(%rsp) - movdqa %xmm3, -40(%rsp) - movdqa %xmm13, -56(%rsp) - jbe .Lpoly1305_blocks_x86_7 - movdqa 192(%rdi), %xmm15 - leaq 32(%rsi), %rax - movq %rdx, %rcx - movdqa 176(%rdi), %xmm14 - movdqa %xmm15, %xmm2 - movdqa 208(%rdi), %xmm0 - movdqa %xmm15, 216(%rsp) - movdqa %xmm14, 296(%rsp) - movdqa 224(%rdi), %xmm3 - pmuludq 136(%rsp), %xmm14 - movdqa -24(%rsp), %xmm13 - movdqa %xmm14, 8(%rsp) - pmuludq 136(%rsp), %xmm2 - movdqa -40(%rsp), %xmm14 - movdqa %xmm0, 120(%rsp) - pmuludq 136(%rsp), %xmm0 - movdqa %xmm3, 24(%rsp) - movdqa 160(%rdi), %xmm12 - movdqa %xmm0, %xmm8 - movdqa -56(%rsp), %xmm15 - movdqa %xmm13, 88(%rsp) - pmuludq 136(%rsp), %xmm3 - movdqa %xmm2, 104(%rsp) - movdqa %xmm0, %xmm13 - movdqa -8(%rsp), %xmm11 - movdqa %xmm3, 280(%rsp) - movdqa %xmm2, %xmm3 - movdqa %xmm0, 200(%rsp) - movdqa %xmm14, 184(%rsp) - movdqa %xmm15, 264(%rsp) - jmp .Lpoly1305_blocks_x86_8 -.p2align 6,,63 -.Lpoly1305_blocks_x86_13: - movdqa 200(%rsp), %xmm13 - movdqa %xmm3, %xmm6 - movdqa 200(%rsp), %xmm8 - movdqa 104(%rsp), %xmm3 -.Lpoly1305_blocks_x86_8: - movdqa 8(%rsp), %xmm4 - pmuludq %xmm6, %xmm3 - subq $64, %rcx - pmuludq %xmm10, %xmm8 - movdqa 104(%rsp), %xmm2 - movdqa 200(%rsp), %xmm0 - pmuludq %xmm1, %xmm4 - movdqa 280(%rsp), %xmm15 - pmuludq %xmm6, %xmm13 - movdqa 280(%rsp), %xmm14 - pmuludq %xmm1, %xmm0 - paddq %xmm3, %xmm4 - pmuludq %xmm1, %xmm2 - movdqa 280(%rsp), %xmm3 - paddq %xmm8, %xmm4 - pmuludq %xmm9, %xmm15 - movdqa 280(%rsp), %xmm8 - pmuludq %xmm10, %xmm14 - pmuludq %xmm6, %xmm8 - paddq %xmm13, %xmm2 - movdqa %xmm6, %xmm13 - pmuludq %xmm1, %xmm3 - paddq %xmm15, %xmm4 - movdqa 296(%rsp), %xmm15 - pmuludq %xmm12, %xmm13 - paddq %xmm14, %xmm2 - movdqa %xmm7, %xmm14 - paddq %xmm8, %xmm0 - pmuludq %xmm12, %xmm14 - movdqa %xmm9, %xmm8 - pmuludq 296(%rsp), %xmm6 - pmuludq %xmm12, %xmm8 - movdqa %xmm6, 248(%rsp) - pmuludq %xmm10, %xmm15 - movq -16(%rax), %xmm6 - paddq %xmm13, %xmm3 - movdqa %xmm10, %xmm13 - paddq %xmm14, %xmm4 - movq -8(%rax), %xmm14 - paddq %xmm8, %xmm2 - movq -32(%rax), %xmm8 - pmuludq %xmm12, %xmm13 - paddq %xmm15, %xmm3 - pmuludq %xmm12, %xmm1 - movdqa 216(%rsp), %xmm15 - pmuludq 216(%rsp), %xmm10 - punpcklqdq %xmm6, %xmm8 - movq -24(%rax), %xmm6 - pmuludq %xmm9, %xmm15 - paddq %xmm13, %xmm0 - movdqa 296(%rsp), %xmm13 - paddq 248(%rsp), %xmm1 - punpcklqdq %xmm14, %xmm6 - movdqa 296(%rsp), %xmm14 - pmuludq %xmm9, %xmm13 - pmuludq 120(%rsp), %xmm9 - movdqa %xmm15, 72(%rsp) - paddq %xmm10, %xmm1 - movdqa 216(%rsp), %xmm15 - pmuludq %xmm7, %xmm14 - movdqa %xmm6, %xmm10 - paddq %xmm9, %xmm1 - pmuludq %xmm7, %xmm15 - paddq %xmm13, %xmm0 - paddq 72(%rsp), %xmm3 - movdqa 120(%rsp), %xmm13 - psllq $12, %xmm10 - paddq %xmm14, %xmm2 - movdqa %xmm5, %xmm14 - pand %xmm8, %xmm14 - pmuludq %xmm7, %xmm13 - paddq %xmm15, %xmm0 - movdqa %xmm14, 248(%rsp) - movdqa %xmm8, %xmm14 - psrlq $52, %xmm8 - movdqu (%rax), %xmm9 - por %xmm10, %xmm8 - pmuludq 24(%rsp), %xmm7 - movdqu 16(%rax), %xmm10 - paddq %xmm13, %xmm3 - pxor %xmm13, %xmm13 - movdqa %xmm9, %xmm15 - paddq %xmm7, %xmm1 - movdqa %xmm6, %xmm7 - movdqa %xmm10, -72(%rsp) - punpckldq %xmm10, %xmm15 - movdqa %xmm15, %xmm10 - punpckldq %xmm13, %xmm10 - punpckhdq -72(%rsp), %xmm9 - psrlq $40, %xmm6 - movdqa %xmm10, 72(%rsp) - movdqa %xmm9, %xmm10 - punpckhdq %xmm13, %xmm9 - psllq $18, %xmm9 - paddq 72(%rsp), %xmm4 - addq $64, %rax - paddq %xmm9, %xmm3 - movdqa 40(%rsp), %xmm9 - cmpq $63, %rcx - punpckhdq %xmm13, %xmm15 - psllq $6, %xmm15 - punpckldq %xmm13, %xmm10 - paddq %xmm15, %xmm2 - psllq $12, %xmm10 - por 168(%rsp), %xmm6 - pmuludq %xmm6, %xmm9 - movdqa 88(%rsp), %xmm15 - paddq %xmm10, %xmm0 - movdqa 88(%rsp), %xmm13 - psrlq $14, %xmm7 - pand %xmm5, %xmm8 - movdqa 184(%rsp), %xmm10 - pand %xmm5, %xmm7 - pmuludq %xmm7, %xmm15 - paddq %xmm9, %xmm4 - pmuludq %xmm6, %xmm13 - movdqa 184(%rsp), %xmm9 - paddq 168(%rsp), %xmm1 - pmuludq %xmm7, %xmm10 - pmuludq %xmm6, %xmm9 - paddq %xmm15, %xmm4 - movdqa 184(%rsp), %xmm15 - paddq %xmm13, %xmm2 - psrlq $26, %xmm14 - movdqa 264(%rsp), %xmm13 - paddq %xmm10, %xmm2 - pmuludq %xmm8, %xmm15 - pand %xmm5, %xmm14 - paddq %xmm9, %xmm0 - pmuludq %xmm6, %xmm13 - movdqa 264(%rsp), %xmm9 - movdqa 264(%rsp), %xmm10 - pmuludq %xmm11, %xmm6 - pmuludq %xmm8, %xmm9 - paddq %xmm15, %xmm4 - movdqa 264(%rsp), %xmm15 - pmuludq %xmm14, %xmm10 - paddq %xmm13, %xmm3 - movdqa %xmm7, %xmm13 - pmuludq %xmm7, %xmm15 - paddq %xmm6, %xmm1 - movdqa 312(%rsp), %xmm6 - paddq %xmm9, %xmm2 - pmuludq %xmm11, %xmm13 - movdqa 248(%rsp), %xmm9 - paddq %xmm10, %xmm4 - pmuludq %xmm8, %xmm6 - pmuludq 312(%rsp), %xmm7 - paddq %xmm15, %xmm0 - movdqa %xmm9, %xmm10 - movdqa %xmm14, %xmm15 - pmuludq %xmm11, %xmm10 - paddq %xmm13, %xmm3 - movdqa %xmm8, %xmm13 - pmuludq %xmm11, %xmm13 - paddq %xmm6, %xmm3 - paddq %xmm7, %xmm1 - movdqa 232(%rsp), %xmm6 - pmuludq %xmm11, %xmm15 - pmuludq 232(%rsp), %xmm8 - paddq %xmm10, %xmm4 - paddq %xmm8, %xmm1 - movdqa 312(%rsp), %xmm10 - paddq %xmm13, %xmm0 - pmuludq %xmm14, %xmm6 - movdqa 312(%rsp), %xmm13 - pmuludq %xmm9, %xmm10 - paddq %xmm15, %xmm2 - movdqa 232(%rsp), %xmm7 - pmuludq %xmm14, %xmm13 - pmuludq 152(%rsp), %xmm14 - paddq %xmm14, %xmm1 - pmuludq %xmm9, %xmm7 - paddq %xmm6, %xmm3 - paddq %xmm10, %xmm2 - movdqa 152(%rsp), %xmm10 - paddq %xmm13, %xmm0 - pmuludq %xmm9, %xmm10 - paddq %xmm7, %xmm0 - movdqa %xmm4, %xmm7 - psrlq $26, %xmm7 - pmuludq 56(%rsp), %xmm9 - pand %xmm5, %xmm4 - paddq %xmm7, %xmm2 - paddq %xmm9, %xmm1 - paddq %xmm10, %xmm3 - movdqa %xmm2, %xmm7 - movdqa %xmm2, %xmm9 - movdqa %xmm3, %xmm6 - psrlq $26, %xmm7 - pand %xmm5, %xmm3 - psrlq $26, %xmm6 - paddq %xmm7, %xmm0 - pand %xmm5, %xmm9 - paddq %xmm6, %xmm1 - movdqa %xmm0, %xmm10 - movdqa %xmm1, %xmm6 - pand %xmm5, %xmm10 - pand %xmm5, %xmm1 - psrlq $26, %xmm6 - pmuludq 136(%rsp), %xmm6 - paddq %xmm6, %xmm4 - movdqa %xmm0, %xmm6 - psrlq $26, %xmm6 - movdqa %xmm4, %xmm2 - movdqa %xmm4, %xmm7 - paddq %xmm6, %xmm3 - psrlq $26, %xmm2 - pand %xmm5, %xmm7 - movdqa %xmm3, %xmm0 - paddq %xmm2, %xmm9 - pand %xmm5, %xmm3 - psrlq $26, %xmm0 - paddq %xmm0, %xmm1 - ja .Lpoly1305_blocks_x86_13 - leaq -64(%rdx), %rax - movdqa %xmm3, %xmm6 - andl $63, %edx - andq $-64, %rax - leaq 64(%rsi,%rax), %rsi -.Lpoly1305_blocks_x86_7: - cmpq $31, %rdx - jbe .Lpoly1305_blocks_x86_9 - movdqa -24(%rsp), %xmm13 - movdqa %xmm6, %xmm0 - movdqa %xmm6, %xmm3 - movdqa 40(%rsp), %xmm11 - movdqa %xmm1, %xmm12 - testq %rsi, %rsi - movdqa -40(%rsp), %xmm2 - pmuludq %xmm13, %xmm0 - movdqa %xmm1, %xmm8 - pmuludq %xmm1, %xmm11 - movdqa %xmm10, %xmm4 - movdqa %xmm1, %xmm14 - pmuludq %xmm2, %xmm3 - movdqa %xmm6, %xmm15 - pmuludq %xmm1, %xmm13 - movdqa %xmm7, %xmm1 - pmuludq %xmm2, %xmm12 - paddq %xmm0, %xmm11 - movdqa -56(%rsp), %xmm0 - pmuludq %xmm10, %xmm2 - paddq %xmm3, %xmm13 - pmuludq %xmm0, %xmm4 - movdqa %xmm9, %xmm3 - pmuludq %xmm0, %xmm3 - paddq %xmm2, %xmm11 - pmuludq %xmm0, %xmm8 - movdqa %xmm6, %xmm2 - pmuludq %xmm0, %xmm2 - movdqa -8(%rsp), %xmm0 - paddq %xmm4, %xmm13 - movdqa 312(%rsp), %xmm4 - paddq %xmm3, %xmm11 - pmuludq 312(%rsp), %xmm6 - movdqa 312(%rsp), %xmm3 - pmuludq %xmm0, %xmm1 - paddq %xmm2, %xmm12 - pmuludq %xmm0, %xmm15 - movdqa %xmm9, %xmm2 - pmuludq %xmm0, %xmm2 - pmuludq %xmm7, %xmm3 - paddq %xmm1, %xmm11 - movdqa 232(%rsp), %xmm1 - pmuludq %xmm0, %xmm14 - paddq %xmm15, %xmm8 - pmuludq %xmm10, %xmm0 - paddq %xmm2, %xmm13 - movdqa 312(%rsp), %xmm2 - pmuludq %xmm10, %xmm4 - paddq %xmm3, %xmm13 - movdqa 152(%rsp), %xmm3 - pmuludq %xmm9, %xmm2 - paddq %xmm6, %xmm14 - pmuludq 232(%rsp), %xmm10 - paddq %xmm0, %xmm12 - pmuludq %xmm9, %xmm1 - paddq %xmm10, %xmm14 - movdqa 232(%rsp), %xmm0 - pmuludq %xmm7, %xmm3 - paddq %xmm4, %xmm8 - pmuludq 152(%rsp), %xmm9 - paddq %xmm2, %xmm12 - paddq %xmm9, %xmm14 - pmuludq %xmm7, %xmm0 - paddq %xmm1, %xmm8 - pmuludq 56(%rsp), %xmm7 - paddq %xmm3, %xmm8 - paddq %xmm7, %xmm14 - paddq %xmm0, %xmm12 - je .Lpoly1305_blocks_x86_10 - movdqu (%rsi), %xmm1 - pxor %xmm0, %xmm0 - paddq 168(%rsp), %xmm14 - movdqu 16(%rsi), %xmm2 - movdqa %xmm1, %xmm3 - punpckldq %xmm2, %xmm3 - punpckhdq %xmm2, %xmm1 - movdqa %xmm3, %xmm4 - movdqa %xmm1, %xmm2 - punpckldq %xmm0, %xmm4 - punpckhdq %xmm0, %xmm3 - punpckhdq %xmm0, %xmm1 - punpckldq %xmm0, %xmm2 - movdqa %xmm2, %xmm0 - psllq $6, %xmm3 - paddq %xmm4, %xmm11 - psllq $12, %xmm0 - paddq %xmm3, %xmm13 - psllq $18, %xmm1 - paddq %xmm0, %xmm12 - paddq %xmm1, %xmm8 -.Lpoly1305_blocks_x86_10: - movdqa %xmm11, %xmm9 - movdqa %xmm8, %xmm1 - movdqa %xmm11, %xmm7 - psrlq $26, %xmm9 - movdqa %xmm8, %xmm6 - pand %xmm5, %xmm7 - paddq %xmm13, %xmm9 - psrlq $26, %xmm1 - pand %xmm5, %xmm6 - movdqa %xmm9, %xmm10 - paddq %xmm14, %xmm1 - pand %xmm5, %xmm9 - psrlq $26, %xmm10 - movdqa %xmm1, %xmm0 - pand %xmm5, %xmm1 - paddq %xmm12, %xmm10 - psrlq $26, %xmm0 - pmuludq 136(%rsp), %xmm0 - movdqa %xmm10, %xmm2 - paddq %xmm0, %xmm7 - psrlq $26, %xmm2 - movdqa %xmm7, %xmm0 - pand %xmm5, %xmm10 - paddq %xmm2, %xmm6 - psrlq $26, %xmm0 - pand %xmm5, %xmm7 - movdqa %xmm6, %xmm2 - paddq %xmm0, %xmm9 - pand %xmm5, %xmm6 - psrlq $26, %xmm2 - paddq %xmm2, %xmm1 -.Lpoly1305_blocks_x86_9: - testq %rsi, %rsi - je .Lpoly1305_blocks_x86_11 - movdqa %xmm7, 0(%rdi) - movdqa %xmm9, 16(%rdi) - movdqa %xmm10, 32(%rdi) - movdqa %xmm6, 48(%rdi) - movdqa %xmm1, 64(%rdi) - movq -8(%rbp), %rbx - leave - ret -.Lpoly1305_blocks_x86_5: - movdqa 0(%rdi), %xmm7 - movdqa 16(%rdi), %xmm9 - movdqa 32(%rdi), %xmm10 - movdqa 48(%rdi), %xmm6 - movdqa 64(%rdi), %xmm1 - jmp .Lpoly1305_blocks_x86_6 -.Lpoly1305_blocks_x86_11: - movdqa %xmm7, %xmm0 - movdqa %xmm9, %xmm2 - movdqa %xmm6, %xmm3 - psrldq $8, %xmm0 - movabsq $4398046511103, %rbx - paddq %xmm0, %xmm7 - psrldq $8, %xmm2 - movdqa %xmm10, %xmm0 - movd %xmm7, %edx - paddq %xmm2, %xmm9 - psrldq $8, %xmm0 - movl %edx, %ecx - movd %xmm9, %eax - paddq %xmm0, %xmm10 - shrl $26, %ecx - psrldq $8, %xmm3 - movdqa %xmm1, %xmm0 - addl %ecx, %eax - movd %xmm10, %ecx - paddq %xmm3, %xmm6 - movl %eax, %r9d - shrl $26, %eax - psrldq $8, %xmm0 - addl %ecx, %eax - movd %xmm6, %ecx - paddq %xmm0, %xmm1 - movl %eax, %esi - andl $67108863, %r9d - movd %xmm1, %r10d - shrl $26, %esi - andl $67108863, %eax - andl $67108863, %edx - addl %ecx, %esi - salq $8, %rax - movl %r9d, %ecx - shrl $18, %r9d - movl %esi, %r8d - shrl $26, %esi - andl $67108863, %r8d - addl %r10d, %esi - orq %r9, %rax - salq $16, %rsi - movq %r8, %r9 - shrl $10, %r8d - salq $26, %rcx - orq %r8, %rsi - salq $34, %r9 - orq %rdx, %rcx - movq %rsi, %r8 - shrq $42, %rsi - movabsq $17592186044415, %rdx - orq %r9, %rax - andq %rbx, %r8 - leaq (%rsi,%rsi,4), %rsi - andq %rdx, %rcx - andq %rdx, %rax - movabsq $-4398046511104, %r10 - addq %rsi, %rcx - movq %rcx, %rsi - shrq $44, %rcx - addq %rcx, %rax - andq %rdx, %rsi - movq %rax, %rcx - shrq $44, %rax - addq %r8, %rax - andq %rdx, %rcx - andq %rax, %rbx - shrq $42, %rax - leaq (%rsi,%rax,4), %rsi - addq %rbx, %r10 - addq %rax, %rsi - movq %rsi, %r8 - shrq $44, %rsi - andq %rdx, %r8 - addq %rcx, %rsi - leaq 5(%r8), %r9 - movq %r9, %r11 - andq %rdx, %r9 - shrq $44, %r11 - addq %rsi, %r11 - movq %r11, %rax - andq %r11, %rdx - shrq $44, %rax - addq %rax, %r10 - movq %r10, %rax - shrq $63, %rax - subq $1, %rax - movq %rax, %rcx - andq %rax, %r9 - andq %rax, %rdx - notq %rcx - andq %r10, %rax - andq %rcx, %r8 - andq %rcx, %rsi - andq %rbx, %rcx - orq %r9, %r8 - orq %rdx, %rsi - orq %rax, %rcx - movq %r8, 0(%rdi) - movq %rsi, 8(%rdi) - movq %rcx, 16(%rdi) - movq -8(%rbp), %rbx - movq %rbp, %rax - subq %rsp, %rax - pxor %xmm15, %xmm15 - pxor %xmm7, %xmm7 - pxor %xmm14, %xmm14 - pxor %xmm6, %xmm6 - pxor %xmm13, %xmm13 - pxor %xmm5, %xmm5 - pxor %xmm12, %xmm12 - pxor %xmm4, %xmm4 - leave - addq $8, %rax - pxor %xmm11, %xmm11 - pxor %xmm3, %xmm3 - pxor %xmm10, %xmm10 - pxor %xmm2, %xmm2 - pxor %xmm9, %xmm9 - pxor %xmm1, %xmm1 - pxor %xmm8, %xmm8 - pxor %xmm0, %xmm0 - ret -ELF(.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;) - -#endif diff --git a/cipher/poly1305.c b/cipher/poly1305.c index 22255fb1..68d9b901 100644 --- a/cipher/poly1305.c +++ b/cipher/poly1305.c @@ -1,643 +1,667 @@ /* poly1305.c - Poly1305 internals and generic implementation - * Copyright (C) 2014 Jussi Kivilinna + * Copyright (C) 2014,2017,2018 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ -/* The code is based on public-domain Poly1305 implementation by - * Andrew Moon at - * https://github.com/floodyberry/poly1305-opt - */ - #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "poly1305-internal.h" +#include "mpi-internal.h" +#include "longlong.h" + static const char *selftest (void); - - - -#ifdef POLY1305_USE_SSE2 - -void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key) - OPS_FUNC_ABI; -unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m, - size_t remaining, - byte mac[16]) OPS_FUNC_ABI; -unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m, - size_t bytes) OPS_FUNC_ABI; - -static const poly1305_ops_t poly1305_amd64_sse2_ops = { - POLY1305_SSE2_BLOCKSIZE, - _gcry_poly1305_amd64_sse2_init_ext, - _gcry_poly1305_amd64_sse2_blocks, - _gcry_poly1305_amd64_sse2_finish_ext -}; - -#else /* !POLY1305_USE_SSE2 */ - -static OPS_FUNC_ABI void poly1305_init_ext_ref32 -/**/ (void *state, const poly1305_key_t *key); -static OPS_FUNC_ABI unsigned int poly1305_blocks_ref32 -/**/ (void *state, const byte *m, size_t bytes); -static OPS_FUNC_ABI unsigned int poly1305_finish_ext_ref32 -/**/ (void *state, const byte * m, - size_t remaining, byte mac[POLY1305_TAGLEN]); - -static const poly1305_ops_t poly1305_default_ops = { - POLY1305_REF_BLOCKSIZE, - poly1305_init_ext_ref32, - poly1305_blocks_ref32, - poly1305_finish_ext_ref32 -}; - -#endif /* !POLY1305_USE_SSE2 */ - - -#ifdef POLY1305_USE_AVX2 - -void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key) - OPS_FUNC_ABI; -unsigned int _gcry_poly1305_amd64_avx2_finish_ext(void *state, const byte *m, - size_t remaining, - byte mac[16]) OPS_FUNC_ABI; -unsigned int _gcry_poly1305_amd64_avx2_blocks(void *ctx, const byte *m, - size_t bytes) OPS_FUNC_ABI; - -static const poly1305_ops_t poly1305_amd64_avx2_ops = { - POLY1305_AVX2_BLOCKSIZE, - _gcry_poly1305_amd64_avx2_init_ext, - _gcry_poly1305_amd64_avx2_blocks, - _gcry_poly1305_amd64_avx2_finish_ext -}; + +#undef USE_MPI_64BIT +#undef USE_MPI_32BIT +#if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_U64_TYPEDEF) +# define USE_MPI_64BIT 1 +#elif BYTES_PER_MPI_LIMB == 4 +# define USE_MPI_32BIT 1 +#else +# error please implement for this limb size. #endif -#ifdef POLY1305_USE_NEON +static void poly1305_init (poly1305_context_t *ctx, + const byte key[POLY1305_KEYLEN]) +{ + POLY1305_STATE *st = &ctx->state; -void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key) - OPS_FUNC_ABI; -unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m, - size_t remaining, - byte mac[16]) OPS_FUNC_ABI; -unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m, - size_t bytes) OPS_FUNC_ABI; + ctx->leftover = 0; -static const poly1305_ops_t poly1305_armv7_neon_ops = { - POLY1305_NEON_BLOCKSIZE, - _gcry_poly1305_armv7_neon_init_ext, - _gcry_poly1305_armv7_neon_blocks, - _gcry_poly1305_armv7_neon_finish_ext -}; + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; -#endif + st->r[0] = buf_get_le32(key + 0) & 0x0fffffff; + st->r[1] = buf_get_le32(key + 4) & 0x0ffffffc; + st->r[2] = buf_get_le32(key + 8) & 0x0ffffffc; + st->r[3] = buf_get_le32(key + 12) & 0x0ffffffc; + st->k[0] = buf_get_le32(key + 16); + st->k[1] = buf_get_le32(key + 20); + st->k[2] = buf_get_le32(key + 24); + st->k[3] = buf_get_le32(key + 28); +} -/* Reference unoptimized poly1305 implementation using 32 bit * 32 bit = 64 bit - * multiplication and 64 bit addition. - */ -typedef struct poly1305_state_ref32_s +#ifdef USE_MPI_64BIT + +#if defined (__aarch64__) && __GNUC__ >= 4 + +/* A += B (armv8/aarch64) */ +#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ + __asm__ ("adds %0, %3, %0\n" \ + "adcs %1, %4, %1\n" \ + "adc %2, %5, %2\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2) \ + : "r" (B0), "r" (B1), "r" (B2) \ + : "cc" ) + +#endif /* __aarch64__ */ + +#if defined (__x86_64__) && __GNUC__ >= 4 + +/* A += B (x86-64) */ +#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ + __asm__ ("addq %3, %0\n" \ + "adcq %4, %1\n" \ + "adcq %5, %2\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2) \ + : "g" (B0), "g" (B1), "g" (B2) \ + : "cc" ) + +#endif /* __x86_64__ */ + +#ifndef ADD_1305_64 +/* A += B (generic, mpi) */ +# define ADD_1305_64(A2, A1, A0, B2, B1, B0) do { \ + u64 carry; \ + add_ssaaaa(carry, A0, 0, A0, 0, B0); \ + add_ssaaaa(A2, A1, A2, A1, B2, B1); \ + add_ssaaaa(A2, A1, A2, A1, 0, carry); \ + } while (0) +#endif + +/* H = H * R mod 2¹³⁰-5 */ +#define MUL_MOD_1305_64(H2, H1, H0, R1, R0, R1_MULT5) do { \ + u64 x0_lo, x0_hi, x1_lo, x1_hi; \ + u64 t0_lo, t0_hi, t1_lo, t1_hi; \ + \ + /* x = a * r (partial mod 2^130-5) */ \ + umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ + umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ + \ + umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \ + add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \ + umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \ + add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \ + \ + t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \ + t1_hi = H2 * R0; /* h2 * r0 */ \ + add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \ + \ + /* carry propagation */ \ + H2 = H0 & 3; \ + H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \ + ADD_1305_64(H2, H1, H0, 0, x0_hi, x0_lo); \ + } while (0) + +unsigned int +poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, + byte high_pad) { - u32 r[5]; - u32 h[5]; - u32 pad[4]; - byte final; -} poly1305_state_ref32_t; + POLY1305_STATE *st = &ctx->state; + u64 r0, r1, r1_mult5; + u64 h0, h1, h2; + u64 m0, m1, m2; + + m2 = high_pad; + + h0 = st->h[0] + ((u64)st->h[1] << 32); + h1 = st->h[2] + ((u64)st->h[3] << 32); + h2 = st->h[4]; + + r0 = st->r[0] + ((u64)st->r[1] << 32); + r1 = st->r[2] + ((u64)st->r[3] << 32); + + r1_mult5 = (r1 >> 2) + r1; + + m0 = buf_get_le64(buf + 0); + m1 = buf_get_le64(buf + 8); + buf += POLY1305_BLOCKSIZE; + len -= POLY1305_BLOCKSIZE; + + while (len >= POLY1305_BLOCKSIZE) + { + /* a = h + m */ + ADD_1305_64(h2, h1, h0, m2, m1, m0); + + m0 = buf_get_le64(buf + 0); + m1 = buf_get_le64(buf + 8); + + /* h = a * r (partial mod 2^130-5) */ + MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); + + buf += POLY1305_BLOCKSIZE; + len -= POLY1305_BLOCKSIZE; + } + + /* a = h + m */ + ADD_1305_64(h2, h1, h0, m2, m1, m0); + + /* h = a * r (partial mod 2^130-5) */ + MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); + st->h[0] = h0; + st->h[1] = h0 >> 32; + st->h[2] = h1; + st->h[3] = h1 >> 32; + st->h[4] = h2; + + return 6 * sizeof (void *) + 18 * sizeof (u64); +} -#ifndef POLY1305_USE_SSE2 -static OPS_FUNC_ABI void -poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key) +static unsigned int poly1305_final (poly1305_context_t *ctx, + byte mac[POLY1305_TAGLEN]) { - poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state; + POLY1305_STATE *st = &ctx->state; + unsigned int burn = 0; + u64 u, carry; + u64 k0, k1; + u64 h0, h1; + u64 h2; + + /* process the remaining block */ + if (ctx->leftover) + { + ctx->buffer[ctx->leftover++] = 1; + for (; ctx->leftover < POLY1305_BLOCKSIZE; ctx->leftover++) + ctx->buffer[ctx->leftover] = 0; + burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); + } - gcry_assert (sizeof (*st) + POLY1305_STATE_ALIGNMENT <= - sizeof (((poly1305_context_t *) 0)->state)); + h0 = st->h[0] + ((u64)st->h[1] << 32); + h1 = st->h[2] + ((u64)st->h[3] << 32); + h2 = st->h[4]; - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - st->r[0] = (buf_get_le32 (&key->b[0])) & 0x3ffffff; - st->r[1] = (buf_get_le32 (&key->b[3]) >> 2) & 0x3ffff03; - st->r[2] = (buf_get_le32 (&key->b[6]) >> 4) & 0x3ffc0ff; - st->r[3] = (buf_get_le32 (&key->b[9]) >> 6) & 0x3f03fff; - st->r[4] = (buf_get_le32 (&key->b[12]) >> 8) & 0x00fffff; + k0 = st->k[0] + ((u64)st->k[1] << 32); + k1 = st->k[2] + ((u64)st->k[3] << 32); - /* h = 0 */ - st->h[0] = 0; - st->h[1] = 0; - st->h[2] = 0; - st->h[3] = 0; - st->h[4] = 0; + /* check if h is more than 2^130-5, by adding 5. */ + add_ssaaaa(carry, u, 0, h0, 0, 5); + add_ssaaaa(carry, u, 0, carry, 0, h1); + u = (carry + h2) >> 2; /* u == 0 or 1 */ - /* save pad for later */ - st->pad[0] = buf_get_le32 (&key->b[16]); - st->pad[1] = buf_get_le32 (&key->b[20]); - st->pad[2] = buf_get_le32 (&key->b[24]); - st->pad[3] = buf_get_le32 (&key->b[28]); + /* minus 2^130-5 ... (+5) */ + u = (-u) & 5; + add_ssaaaa(h1, h0, h1, h0, 0, u); - st->final = 0; + /* add high part of key + h */ + add_ssaaaa(h1, h0, h1, h0, k1, k0); + buf_put_le64(mac + 0, h0); + buf_put_le64(mac + 8, h1); + + /* burn_stack */ + return 4 * sizeof (void *) + 7 * sizeof (u64) + burn; } -#endif /* !POLY1305_USE_SSE2 */ +#endif /* USE_MPI_64BIT */ + +#ifdef USE_MPI_32BIT + +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +/* HI:LO += A * B (arm) */ +#define UMUL_ADD_32(HI, LO, A, B) \ + __asm__ ("umlal %1, %0, %4, %5" \ + : "=r" (HI), "=r" (LO) \ + : "0" (HI), "1" (LO), "r" (A), "r" (B) ) + +/* A += B (arm) */ +#define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \ + __asm__ ("adds %0, %0, %5\n" \ + "adcs %1, %1, %6\n" \ + "adcs %2, %2, %7\n" \ + "adcs %3, %3, %8\n" \ + "adc %4, %4, %9\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \ + : "r" (B0), "r" (B1), "r" (B2), "r" (B3), "r" (B4) \ + : "cc" ) + +#endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */ + +#if defined (__i386__) && __GNUC__ >= 4 + +/* A += B (i386) */ +#define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \ + __asm__ ("addl %5, %0\n" \ + "adcl %6, %1\n" \ + "adcl %7, %2\n" \ + "adcl %8, %3\n" \ + "adcl %9, %4\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \ + : "g" (B0), "g" (B1), "g" (B2), "g" (B3), "g" (B4) \ + : "cc" ) + +#endif /* __i386__ */ + +#ifndef UMUL_ADD_32 +/* HI:LO += A * B (generic, mpi) */ +# define UMUL_ADD_32(HI, LO, A, B) do { \ + u32 t_lo, t_hi; \ + umul_ppmm(t_hi, t_lo, A, B); \ + add_ssaaaa(HI, LO, HI, LO, t_hi, t_lo); \ + } while (0) +#endif + +#ifndef ADD_1305_32 +/* A += B (generic, mpi) */ +# define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ + u32 carry0, carry1, carry2; \ + add_ssaaaa(carry0, A0, 0, A0, 0, B0); \ + add_ssaaaa(carry1, A1, 0, A1, 0, B1); \ + add_ssaaaa(carry1, A1, carry1, A1, 0, carry0); \ + add_ssaaaa(carry2, A2, 0, A2, 0, B2); \ + add_ssaaaa(carry2, A2, carry2, A2, 0, carry1); \ + add_ssaaaa(A4, A3, A4, A3, B4, B3); \ + add_ssaaaa(A4, A3, A4, A3, 0, carry2); \ + } while (0) +#endif -#ifndef POLY1305_USE_SSE2 -static OPS_FUNC_ABI unsigned int -poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes) +/* H = H * R mod 2¹³⁰-5 */ +#define MUL_MOD_1305_32(H4, H3, H2, H1, H0, R3, R2, R1, R0, \ + R3_MULT5, R2_MULT5, R1_MULT5) do { \ + u32 x0_lo, x0_hi, x1_lo, x1_hi, x2_lo, x2_hi, x3_lo, x3_hi; \ + u32 t0_lo, t0_hi; \ + \ + /* x = a * r (partial mod 2^130-5) */ \ + umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ + umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ + umul_ppmm(x2_hi, x2_lo, H0, R2); /* h0 * r2 */ \ + umul_ppmm(x3_hi, x3_lo, H0, R3); /* h0 * r3 */ \ + \ + UMUL_ADD_32(x0_hi, x0_lo, H1, R3_MULT5); /* h1 * r3 mod 2^130-5 */ \ + UMUL_ADD_32(x1_hi, x1_lo, H1, R0); /* h1 * r0 */ \ + UMUL_ADD_32(x2_hi, x2_lo, H1, R1); /* h1 * r1 */ \ + UMUL_ADD_32(x3_hi, x3_lo, H1, R2); /* h1 * r2 */ \ + \ + UMUL_ADD_32(x0_hi, x0_lo, H2, R2_MULT5); /* h2 * r2 mod 2^130-5 */ \ + UMUL_ADD_32(x1_hi, x1_lo, H2, R3_MULT5); /* h2 * r3 mod 2^130-5 */ \ + UMUL_ADD_32(x2_hi, x2_lo, H2, R0); /* h2 * r0 */ \ + UMUL_ADD_32(x3_hi, x3_lo, H2, R1); /* h2 * r1 */ \ + \ + UMUL_ADD_32(x0_hi, x0_lo, H3, R1_MULT5); /* h3 * r1 mod 2^130-5 */ \ + H1 = x0_hi; \ + UMUL_ADD_32(x1_hi, x1_lo, H3, R2_MULT5); /* h3 * r2 mod 2^130-5 */ \ + UMUL_ADD_32(x2_hi, x2_lo, H3, R3_MULT5); /* h3 * r3 mod 2^130-5 */ \ + UMUL_ADD_32(x3_hi, x3_lo, H3, R0); /* h3 * r0 */ \ + \ + t0_lo = H4 * R1_MULT5; /* h4 * r1 mod 2^130-5 */ \ + t0_hi = H4 * R2_MULT5; /* h4 * r2 mod 2^130-5 */ \ + add_ssaaaa(H2, x1_lo, x1_hi, x1_lo, 0, t0_lo); \ + add_ssaaaa(H3, x2_lo, x2_hi, x2_lo, 0, t0_hi); \ + t0_lo = H4 * R3_MULT5; /* h4 * r3 mod 2^130-5 */ \ + t0_hi = H4 * R0; /* h4 * r0 */ \ + add_ssaaaa(H4, x3_lo, x3_hi, x3_lo, t0_hi, t0_lo); \ + \ + /* carry propagation */ \ + H0 = (H4 >> 2) * 5; /* msb mod 2^130-5 */ \ + H4 = H4 & 3; \ + ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \ + } while (0) + +unsigned int +poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, + byte high_pad) { - poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state; - const u32 hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */ - u32 r0, r1, r2, r3, r4; - u32 s1, s2, s3, s4; + POLY1305_STATE *st = &ctx->state; + u32 r1_mult5, r2_mult5, r3_mult5; u32 h0, h1, h2, h3, h4; - u64 d0, d1, d2, d3, d4; - u32 c; - - r0 = st->r[0]; - r1 = st->r[1]; - r2 = st->r[2]; - r3 = st->r[3]; - r4 = st->r[4]; + u32 m0, m1, m2, m3, m4; - s1 = r1 * 5; - s2 = r2 * 5; - s3 = r3 * 5; - s4 = r4 * 5; + m4 = high_pad; h0 = st->h[0]; h1 = st->h[1]; h2 = st->h[2]; h3 = st->h[3]; h4 = st->h[4]; - while (bytes >= POLY1305_REF_BLOCKSIZE) + r1_mult5 = (st->r[1] >> 2) + st->r[1]; + r2_mult5 = (st->r[2] >> 2) + st->r[2]; + r3_mult5 = (st->r[3] >> 2) + st->r[3]; + + while (len >= POLY1305_BLOCKSIZE) { - /* h += m[i] */ - h0 += (buf_get_le32 (m + 0)) & 0x3ffffff; - h1 += (buf_get_le32 (m + 3) >> 2) & 0x3ffffff; - h2 += (buf_get_le32 (m + 6) >> 4) & 0x3ffffff; - h3 += (buf_get_le32 (m + 9) >> 6) & 0x3ffffff; - h4 += (buf_get_le32 (m + 12) >> 8) | hibit; - - /* h *= r */ - d0 = - ((u64) h0 * r0) + ((u64) h1 * s4) + - ((u64) h2 * s3) + ((u64) h3 * s2) + ((u64) h4 * s1); - d1 = - ((u64) h0 * r1) + ((u64) h1 * r0) + - ((u64) h2 * s4) + ((u64) h3 * s3) + ((u64) h4 * s2); - d2 = - ((u64) h0 * r2) + ((u64) h1 * r1) + - ((u64) h2 * r0) + ((u64) h3 * s4) + ((u64) h4 * s3); - d3 = - ((u64) h0 * r3) + ((u64) h1 * r2) + - ((u64) h2 * r1) + ((u64) h3 * r0) + ((u64) h4 * s4); - d4 = - ((u64) h0 * r4) + ((u64) h1 * r3) + - ((u64) h2 * r2) + ((u64) h3 * r1) + ((u64) h4 * r0); - - /* (partial) h %= p */ - c = (u32) (d0 >> 26); - h0 = (u32) d0 & 0x3ffffff; - d1 += c; - c = (u32) (d1 >> 26); - h1 = (u32) d1 & 0x3ffffff; - d2 += c; - c = (u32) (d2 >> 26); - h2 = (u32) d2 & 0x3ffffff; - d3 += c; - c = (u32) (d3 >> 26); - h3 = (u32) d3 & 0x3ffffff; - d4 += c; - c = (u32) (d4 >> 26); - h4 = (u32) d4 & 0x3ffffff; - h0 += c * 5; - c = (h0 >> 26); - h0 = h0 & 0x3ffffff; - h1 += c; - - m += POLY1305_REF_BLOCKSIZE; - bytes -= POLY1305_REF_BLOCKSIZE; + m0 = buf_get_le32(buf + 0); + m1 = buf_get_le32(buf + 4); + m2 = buf_get_le32(buf + 8); + m3 = buf_get_le32(buf + 12); + + /* a = h + m */ + ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0); + + /* h = a * r (partial mod 2^130-5) */ + MUL_MOD_1305_32(h4, h3, h2, h1, h0, + st->r[3], st->r[2], st->r[1], st->r[0], + r3_mult5, r2_mult5, r1_mult5); + + buf += POLY1305_BLOCKSIZE; + len -= POLY1305_BLOCKSIZE; } st->h[0] = h0; st->h[1] = h1; st->h[2] = h2; st->h[3] = h3; st->h[4] = h4; - return (16 * sizeof (u32) + 5 * sizeof (u64) + 5 * sizeof (void *)); + return 6 * sizeof (void *) + 28 * sizeof (u32); } -#endif /* !POLY1305_USE_SSE2 */ - -#ifndef POLY1305_USE_SSE2 -static OPS_FUNC_ABI unsigned int -poly1305_finish_ext_ref32 (void *state, const byte * m, - size_t remaining, byte mac[POLY1305_TAGLEN]) +static unsigned int poly1305_final (poly1305_context_t *ctx, + byte mac[POLY1305_TAGLEN]) { - poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state; - u32 h0, h1, h2, h3, h4, c; - u32 g0, g1, g2, g3, g4; - u64 f; - u32 mask; + POLY1305_STATE *st = &ctx->state; unsigned int burn = 0; + u32 carry, tmp0, tmp1, tmp2, u; + u32 h4, h3, h2, h1, h0; /* process the remaining block */ - if (remaining) + if (ctx->leftover) { - byte final[POLY1305_REF_BLOCKSIZE] = { 0 }; - size_t i; - for (i = 0; i < remaining; i++) - final[i] = m[i]; - final[remaining] = 1; - st->final = 1; - burn = poly1305_blocks_ref32 (st, final, POLY1305_REF_BLOCKSIZE); + ctx->buffer[ctx->leftover++] = 1; + for (; ctx->leftover < POLY1305_BLOCKSIZE; ctx->leftover++) + ctx->buffer[ctx->leftover] = 0; + burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); } - /* fully carry h */ h0 = st->h[0]; h1 = st->h[1]; h2 = st->h[2]; h3 = st->h[3]; h4 = st->h[4]; - c = h1 >> 26; - h1 = h1 & 0x3ffffff; - h2 += c; - c = h2 >> 26; - h2 = h2 & 0x3ffffff; - h3 += c; - c = h3 >> 26; - h3 = h3 & 0x3ffffff; - h4 += c; - c = h4 >> 26; - h4 = h4 & 0x3ffffff; - h0 += c * 5; - c = h0 >> 26; - h0 = h0 & 0x3ffffff; - h1 += c; - - /* compute h + -p */ - g0 = h0 + 5; - c = g0 >> 26; - g0 &= 0x3ffffff; - g1 = h1 + c; - c = g1 >> 26; - g1 &= 0x3ffffff; - g2 = h2 + c; - c = g2 >> 26; - g2 &= 0x3ffffff; - g3 = h3 + c; - c = g3 >> 26; - g3 &= 0x3ffffff; - g4 = h4 + c - (1 << 26); - - /* select h if h < p, or h + -p if h >= p */ - mask = (g4 >> ((sizeof (u32) * 8) - 1)) - 1; - g0 &= mask; - g1 &= mask; - g2 &= mask; - g3 &= mask; - g4 &= mask; - mask = ~mask; - h0 = (h0 & mask) | g0; - h1 = (h1 & mask) | g1; - h2 = (h2 & mask) | g2; - h3 = (h3 & mask) | g3; - h4 = (h4 & mask) | g4; - - /* h = h % (2^128) */ - h0 = ((h0) | (h1 << 26)) & 0xffffffff; - h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; - h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; - h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; - - /* mac = (h + pad) % (2^128) */ - f = (u64) h0 + st->pad[0]; - h0 = (u32) f; - f = (u64) h1 + st->pad[1] + (f >> 32); - h1 = (u32) f; - f = (u64) h2 + st->pad[2] + (f >> 32); - h2 = (u32) f; - f = (u64) h3 + st->pad[3] + (f >> 32); - h3 = (u32) f; - - buf_put_le32 (mac + 0, h0); - buf_put_le32 (mac + 4, h1); - buf_put_le32 (mac + 8, h2); - buf_put_le32 (mac + 12, h3); - - /* zero out the state */ - st->h[0] = 0; - st->h[1] = 0; - st->h[2] = 0; - st->h[3] = 0; - st->h[4] = 0; - st->r[0] = 0; - st->r[1] = 0; - st->r[2] = 0; - st->r[3] = 0; - st->r[4] = 0; - st->pad[0] = 0; - st->pad[1] = 0; - st->pad[2] = 0; - st->pad[3] = 0; + /* check if h is more than 2^130-5, by adding 5. */ + add_ssaaaa(carry, tmp0, 0, h0, 0, 5); + add_ssaaaa(carry, tmp0, 0, carry, 0, h1); + add_ssaaaa(carry, tmp0, 0, carry, 0, h2); + add_ssaaaa(carry, tmp0, 0, carry, 0, h3); + u = (carry + h4) >> 2; /* u == 0 or 1 */ + + /* minus 2^130-5 ... (+5) */ + u = (-u) & 5; + add_ssaaaa(carry, h0, 0, h0, 0, u); + add_ssaaaa(carry, h1, 0, h1, 0, carry); + add_ssaaaa(carry, h2, 0, h2, 0, carry); + add_ssaaaa(carry, h3, 0, h3, 0, carry); + + /* add high part of key + h */ + add_ssaaaa(tmp0, h0, 0, h0, 0, st->k[0]); + add_ssaaaa(tmp1, h1, 0, h1, 0, st->k[1]); + add_ssaaaa(tmp1, h1, tmp1, h1, 0, tmp0); + add_ssaaaa(tmp2, h2, 0, h2, 0, st->k[2]); + add_ssaaaa(tmp2, h2, tmp2, h2, 0, tmp1); + add_ssaaaa(carry, h3, 0, h3, 0, st->k[3]); + h3 += tmp2; + + buf_put_le32(mac + 0, h0); + buf_put_le32(mac + 4, h1); + buf_put_le32(mac + 8, h2); + buf_put_le32(mac + 12, h3); /* burn_stack */ - return (13 * sizeof (u32) + sizeof (u64) + - POLY1305_REF_BLOCKSIZE + 6 * sizeof (void *)) + burn; + return 4 * sizeof (void *) + 10 * sizeof (u32) + burn; } -#endif /* !POLY1305_USE_SSE2*/ - - - - -static inline void * -poly1305_get_state (poly1305_context_t * ctx) -{ - byte *c = ctx->state; - c += POLY1305_STATE_ALIGNMENT - 1; - c -= (uintptr_t) c & (POLY1305_STATE_ALIGNMENT - 1); - return c; -} - - -static void -poly1305_init (poly1305_context_t * ctx, const poly1305_key_t * key) -{ - void *state = poly1305_get_state (ctx); - - ctx->leftover = 0; - - ctx->ops->init_ext (state, key); -} +#endif /* USE_MPI_32BIT */ void -_gcry_poly1305_update (poly1305_context_t * ctx, const byte * m, size_t bytes) +_gcry_poly1305_update (poly1305_context_t *ctx, const byte *m, size_t bytes) { - void *state = poly1305_get_state (ctx); unsigned int burn = 0; - size_t block_size = ctx->ops->block_size; /* handle leftover */ if (ctx->leftover) { - size_t want = (block_size - ctx->leftover); + size_t want = (POLY1305_BLOCKSIZE - ctx->leftover); if (want > bytes) want = bytes; buf_cpy (ctx->buffer + ctx->leftover, m, want); bytes -= want; m += want; ctx->leftover += want; - if (ctx->leftover < block_size) + if (ctx->leftover < POLY1305_BLOCKSIZE) return; - burn = ctx->ops->blocks (state, ctx->buffer, block_size); + burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1); ctx->leftover = 0; } /* process full blocks */ - if (bytes >= block_size) + if (bytes >= POLY1305_BLOCKSIZE) { - size_t want = (bytes & ~(block_size - 1)); - burn = ctx->ops->blocks (state, m, want); - m += want; - bytes -= want; + size_t nblks = bytes / POLY1305_BLOCKSIZE; + burn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1); + m += nblks * POLY1305_BLOCKSIZE; + bytes -= nblks * POLY1305_BLOCKSIZE; } /* store leftover */ if (bytes) { buf_cpy (ctx->buffer + ctx->leftover, m, bytes); ctx->leftover += bytes; } if (burn) _gcry_burn_stack (burn); } void -_gcry_poly1305_finish (poly1305_context_t * ctx, byte mac[POLY1305_TAGLEN]) +_gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { - void *state = poly1305_get_state (ctx); unsigned int burn; - burn = ctx->ops->finish_ext (state, ctx->buffer, ctx->leftover, mac); + burn = poly1305_final (ctx, mac); _gcry_burn_stack (burn); } gcry_err_code_t _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, size_t keylen) { static int initialized; static const char *selftest_failed; - poly1305_key_t keytmp; - unsigned int features = _gcry_get_hw_features (); if (!initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("Poly1305 selftest failed (%s)\n", selftest_failed); } if (keylen != POLY1305_KEYLEN) return GPG_ERR_INV_KEYLEN; if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; -#ifdef POLY1305_USE_SSE2 - ctx->ops = &poly1305_amd64_sse2_ops; -#else - ctx->ops = &poly1305_default_ops; -#endif - -#ifdef POLY1305_USE_AVX2 - if (features & HWF_INTEL_AVX2) - ctx->ops = &poly1305_amd64_avx2_ops; -#endif -#ifdef POLY1305_USE_NEON - if (features & HWF_ARM_NEON) - ctx->ops = &poly1305_armv7_neon_ops; -#endif - (void)features; - - buf_cpy (keytmp.b, key, POLY1305_KEYLEN); - poly1305_init (ctx, &keytmp); - - wipememory (&keytmp, sizeof (keytmp)); + poly1305_init (ctx, key); return 0; } static void poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes, const byte * key) { poly1305_context_t ctx; memset (&ctx, 0, sizeof (ctx)); _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN); _gcry_poly1305_update (&ctx, m, bytes); _gcry_poly1305_finish (&ctx, mac); wipememory (&ctx, sizeof (ctx)); } static const char * selftest (void) { /* example from nacl */ static const byte nacl_key[POLY1305_KEYLEN] = { 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91, 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25, 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65, 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80, }; static const byte nacl_msg[131] = { 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73, 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce, 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4, 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a, 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b, 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72, 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2, 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38, 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a, 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae, 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea, 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda, 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde, 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3, 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6, 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74, 0xe3, 0x55, 0xa5 }; static const byte nacl_mac[16] = { 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5, 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9 }; /* generates a final value of (2^130 - 2) == 3 */ static const byte wrap_key[POLY1305_KEYLEN] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static const byte wrap_msg[16] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const byte wrap_mac[16] = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; /* mac of the macs of messages of length 0 to 256, where the key and messages * have all their values set to the length */ static const byte total_key[POLY1305_KEYLEN] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const byte total_mac[16] = { 0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd, 0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39 }; poly1305_context_t ctx; poly1305_context_t total_ctx; byte all_key[POLY1305_KEYLEN]; byte all_msg[256]; byte mac[16]; size_t i, j; memset (&ctx, 0, sizeof (ctx)); memset (&total_ctx, 0, sizeof (total_ctx)); memset (mac, 0, sizeof (mac)); poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key); if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 1 failed."; /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so * make sure everything still works varying between them */ memset (mac, 0, sizeof (mac)); _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN); _gcry_poly1305_update (&ctx, nacl_msg + 0, 32); _gcry_poly1305_update (&ctx, nacl_msg + 32, 64); _gcry_poly1305_update (&ctx, nacl_msg + 96, 16); _gcry_poly1305_update (&ctx, nacl_msg + 112, 8); _gcry_poly1305_update (&ctx, nacl_msg + 120, 4); _gcry_poly1305_update (&ctx, nacl_msg + 124, 2); _gcry_poly1305_update (&ctx, nacl_msg + 126, 1); _gcry_poly1305_update (&ctx, nacl_msg + 127, 1); _gcry_poly1305_update (&ctx, nacl_msg + 128, 1); _gcry_poly1305_update (&ctx, nacl_msg + 129, 1); _gcry_poly1305_update (&ctx, nacl_msg + 130, 1); _gcry_poly1305_finish (&ctx, mac); if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 2 failed."; memset (mac, 0, sizeof (mac)); poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key); if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 3 failed."; _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN); for (i = 0; i < 256; i++) { /* set key and message to 'i,i,i..' */ for (j = 0; j < sizeof (all_key); j++) all_key[j] = i; for (j = 0; j < i; j++) all_msg[j] = i; poly1305_auth (mac, all_msg, i, all_key); _gcry_poly1305_update (&total_ctx, mac, 16); } _gcry_poly1305_finish (&total_ctx, mac); if (memcmp (total_mac, mac, sizeof (total_mac)) != 0) return "Poly1305 test 4 failed."; return NULL; } diff --git a/configure.ac b/configure.ac index 57b840e6..c4b59f4d 100644 --- a/configure.ac +++ b/configure.ac @@ -1,2665 +1,2652 @@ # Configure.ac script for Libgcrypt # Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, # 2007, 2008, 2009, 2011 Free Software Foundation, Inc. # Copyright (C) 2012-2017 g10 Code GmbH # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # (Process this file with autoconf to produce a configure script.) AC_REVISION($Revision$) AC_PREREQ(2.60) min_automake_version="1.14" # To build a release you need to create a tag with the version number # (git tag -s libgcrypt-n.m.k) and run "./autogen.sh --force". Please # bump the version number immediately after the release and do another # commit and push so that the git magic is able to work. See below # for the LT versions. m4_define(mym4_version_major, [1]) m4_define(mym4_version_minor, [9]) m4_define(mym4_version_micro, [0]) # Below is m4 magic to extract and compute the revision number, the # decimalized short revision number, a beta version string, and a flag # indicating a development version (mym4_isgit). Note that the m4 # processing is done by autoconf and not during the configure run. m4_define(mym4_version, [mym4_version_major.mym4_version_minor.mym4_version_micro]) m4_define([mym4_revision], m4_esyscmd([git rev-parse --short HEAD | tr -d '\n\r'])) m4_define([mym4_revision_dec], m4_esyscmd_s([echo $((0x$(echo ]mym4_revision[|head -c 4)))])) m4_define([mym4_betastring], m4_esyscmd_s([git describe --match 'libgcrypt-[0-9].*[0-9]' --long|\ awk -F- '$3!=0{print"-beta"$3}'])) m4_define([mym4_isgit],m4_if(mym4_betastring,[],[no],[yes])) m4_define([mym4_full_version],[mym4_version[]mym4_betastring]) AC_INIT([libgcrypt],[mym4_full_version],[http://bugs.gnupg.org]) # LT Version numbers, remember to change them just *before* a release. # (Interfaces removed: CURRENT++, AGE=0, REVISION=0) # (Interfaces added: CURRENT++, AGE++, REVISION=0) # (No interfaces changed: REVISION++) LIBGCRYPT_LT_CURRENT=23 LIBGCRYPT_LT_AGE=3 LIBGCRYPT_LT_REVISION=0 # If the API is changed in an incompatible way: increment the next counter. # # 1.6: ABI and API change but the change is to most users irrelevant # and thus the API version number has not been incremented. LIBGCRYPT_CONFIG_API_VERSION=1 # If you change the required gpg-error version, please remove # unnecessary error code defines in src/gcrypt-int.h. NEED_GPG_ERROR_VERSION=1.25 PACKAGE=$PACKAGE_NAME VERSION=$PACKAGE_VERSION AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_SRCDIR([src/libgcrypt.vers]) AM_INIT_AUTOMAKE([serial-tests dist-bzip2]) AC_CONFIG_HEADER(config.h) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_LIBOBJ_DIR([compat]) AC_CANONICAL_HOST AM_MAINTAINER_MODE AM_SILENT_RULES AC_ARG_VAR(SYSROOT,[locate config scripts also below that directory]) AH_TOP([ #ifndef _GCRYPT_CONFIG_H_INCLUDED #define _GCRYPT_CONFIG_H_INCLUDED /* Enable gpg-error's strerror macro for W32CE. */ #define GPG_ERR_ENABLE_ERRNO_MACROS 1 ]) AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK # if defined(__BIG_ENDIAN__) # define WORDS_BIGENDIAN 1 # elif defined(__LITTLE_ENDIAN__) # undef WORDS_BIGENDIAN # else # error "No endianness found" # endif #endif /*DISABLED_ENDIAN_CHECK*/ /* We basically use the original Camellia source. Make sure the symbols properly prefixed. */ #define CAMELLIA_EXT_SYM_PREFIX _gcry_ #endif /*_GCRYPT_CONFIG_H_INCLUDED*/ ]) AH_VERBATIM([_REENTRANT], [/* To allow the use of Libgcrypt in multithreaded programs we have to use special features from the library. */ #ifndef _REENTRANT # define _REENTRANT 1 #endif ]) AC_SUBST(LIBGCRYPT_LT_CURRENT) AC_SUBST(LIBGCRYPT_LT_AGE) AC_SUBST(LIBGCRYPT_LT_REVISION) AC_SUBST(PACKAGE) AC_SUBST(VERSION) AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of this package]) AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version of this package]) VERSION_NUMBER=m4_esyscmd(printf "0x%02x%02x%02x" mym4_version_major \ mym4_version_minor mym4_version_micro) AC_SUBST(VERSION_NUMBER) ###################### ## Basic checks. ### (we need some results later on (e.g. $GCC) ###################### AC_PROG_MAKE_SET missing_dir=`cd $ac_aux_dir && pwd` AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) # AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir) AC_PROG_CC AC_PROG_CPP AM_PROG_CC_C_O AM_PROG_AS AC_ISC_POSIX AC_PROG_INSTALL AC_PROG_AWK AC_GNU_SOURCE # We need to compile and run a program on the build machine. A # comment in libgpg-error says that the AC_PROG_CC_FOR_BUILD macro in # the AC archive is broken for autoconf 2.57. Given that there is no # newer version of that macro, we assume that it is also broken for # autoconf 2.61 and thus we use a simple but usually sufficient # approach. AC_MSG_CHECKING(for cc for build) if test "$cross_compiling" = "yes"; then CC_FOR_BUILD="${CC_FOR_BUILD-cc}" else CC_FOR_BUILD="${CC_FOR_BUILD-$CC}" fi AC_MSG_RESULT($CC_FOR_BUILD) AC_ARG_VAR(CC_FOR_BUILD,[build system C compiler]) LT_PREREQ([2.2.6]) LT_INIT([win32-dll disable-static]) LT_LANG([Windows Resource]) ########################## ## General definitions. ## ########################## # Used by libgcrypt-config LIBGCRYPT_CONFIG_LIBS="-lgcrypt" LIBGCRYPT_CONFIG_CFLAGS="" LIBGCRYPT_CONFIG_HOST="$host" # Definitions for symmetric ciphers. available_ciphers="arcfour blowfish cast5 des aes twofish serpent rfc2268 seed" available_ciphers="$available_ciphers camellia idea salsa20 gost28147 chacha20" enabled_ciphers="" # Definitions for public-key ciphers. available_pubkey_ciphers="dsa elgamal rsa ecc" enabled_pubkey_ciphers="" # Definitions for message digests. available_digests="crc gostr3411-94 md2 md4 md5 rmd160 sha1 sha256 sha512" available_digests="$available_digests sha3 tiger whirlpool stribog blake2" available_digests="$available_digests sm3" enabled_digests="" # Definitions for kdfs (optional ones) available_kdfs="s2k pkdf2 scrypt" enabled_kdfs="" # Definitions for random modules. available_random_modules="linux egd unix" auto_random_modules="$available_random_modules" # Supported thread backends. LIBGCRYPT_THREAD_MODULES="" # Other definitions. have_w32_system=no have_w32ce_system=no have_pthread=no # Setup some stuff depending on host. case "${host}" in *-*-mingw32*) ac_cv_have_dev_random=no have_w32_system=yes case "${host}" in *-mingw32ce*) have_w32ce_system=yes available_random_modules="w32ce" ;; *) available_random_modules="w32" ;; esac AC_DEFINE(USE_ONLY_8DOT3,1, [set this to limit filenames to the 8.3 format]) AC_DEFINE(HAVE_DRIVE_LETTERS,1, [defined if we must run on a stupid file system]) AC_DEFINE(HAVE_DOSISH_SYSTEM,1, [defined if we run on some of the PCDOS like systems (DOS, Windoze. OS/2) with special properties like no file modes]) ;; i?86-emx-os2 | i?86-*-os2*emx) # OS/2 with the EMX environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; i?86-*-msdosdjgpp*) # DOS with the DJGPP environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; *-*-hpux*) if test -z "$GCC" ; then CFLAGS="$CFLAGS -Ae -D_HPUX_SOURCE" fi ;; *-dec-osf4*) if test -z "$GCC" ; then # Suppress all warnings # to get rid of the unsigned/signed char mismatch warnings. CFLAGS="$CFLAGS -w" fi ;; m68k-atari-mint) ;; *-apple-darwin*) AC_DEFINE(_DARWIN_C_SOURCE, 900000L, Expose all libc features (__DARWIN_C_FULL).) ;; *) ;; esac if test "$have_w32_system" = yes; then AC_DEFINE(HAVE_W32_SYSTEM,1, [Defined if we run on a W32 API based system]) if test "$have_w32ce_system" = yes; then AC_DEFINE(HAVE_W32CE_SYSTEM,1,[Defined if we run on WindowsCE]) fi fi AM_CONDITIONAL(HAVE_W32_SYSTEM, test "$have_w32_system" = yes) AM_CONDITIONAL(HAVE_W32CE_SYSTEM, test "$have_w32ce_system" = yes) # A printable OS Name is sometimes useful. case "${host}" in *-*-mingw32ce*) PRINTABLE_OS_NAME="W32CE" ;; *-*-mingw32*) PRINTABLE_OS_NAME="W32" ;; i?86-emx-os2 | i?86-*-os2*emx ) PRINTABLE_OS_NAME="OS/2" ;; i?86-*-msdosdjgpp*) PRINTABLE_OS_NAME="MSDOS/DJGPP" ;; *-linux*) PRINTABLE_OS_NAME="GNU/Linux" ;; *) PRINTABLE_OS_NAME=`uname -s || echo "Unknown"` ;; esac NAME_OF_DEV_RANDOM="/dev/random" NAME_OF_DEV_URANDOM="/dev/urandom" AC_ARG_ENABLE(endian-check, AC_HELP_STRING([--disable-endian-check], [disable the endian check and trust the OS provided macros]), endiancheck=$enableval,endiancheck=yes) if test x"$endiancheck" = xyes ; then AC_C_BIGENDIAN else AC_DEFINE(DISABLED_ENDIAN_CHECK,1,[configure did not test for endianness]) fi AC_CHECK_SIZEOF(unsigned short, 2) AC_CHECK_SIZEOF(unsigned int, 4) AC_CHECK_SIZEOF(unsigned long, 4) AC_CHECK_SIZEOF(unsigned long long, 0) AC_CHECK_SIZEOF(void *, 0) AC_TYPE_UINTPTR_T if test "$ac_cv_sizeof_unsigned_short" = "0" \ || test "$ac_cv_sizeof_unsigned_int" = "0" \ || test "$ac_cv_sizeof_unsigned_long" = "0"; then AC_MSG_WARN([Hmmm, something is wrong with the sizes - using defaults]); fi # Ensure that we have UINT64_C before we bother to check for uint64_t AC_CACHE_CHECK([for UINT64_C],[gnupg_cv_uint64_c_works], AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[uint64_t foo=UINT64_C(42);]])], gnupg_cv_uint64_c_works=yes,gnupg_cv_uint64_c_works=no)) if test "$gnupg_cv_uint64_c_works" = "yes" ; then AC_CHECK_SIZEOF(uint64_t) fi # Do we have any 64-bit data types? if test "$ac_cv_sizeof_unsigned_int" != "8" \ && test "$ac_cv_sizeof_unsigned_long" != "8" \ && test "$ac_cv_sizeof_unsigned_long_long" != "8" \ && test "$ac_cv_sizeof_uint64_t" != "8"; then AC_MSG_ERROR([[ *** *** No 64-bit integer type available. *** It is not possible to build Libgcrypt on this platform. ***]]) fi # If not specified otherwise, all available algorithms will be # included. default_ciphers="$available_ciphers" default_pubkey_ciphers="$available_pubkey_ciphers" default_digests="$available_digests" default_kdfs="$available_kdfs" # Blacklist MD2 by default default_digests=`echo $default_digests | sed -e 's/md2//g'` # Substitutions to set generated files in a Emacs buffer to read-only. AC_SUBST(emacs_local_vars_begin, ['Local Variables:']) AC_SUBST(emacs_local_vars_read_only, ['buffer-read-only: t']) AC_SUBST(emacs_local_vars_end, ['End:']) ############################ ## Command line switches. ## ############################ # Implementation of the --enable-ciphers switch. AC_ARG_ENABLE(ciphers, AC_HELP_STRING([--enable-ciphers=ciphers], [select the symmetric ciphers to include]), [enabled_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_ciphers=""]) if test "x$enabled_ciphers" = "x" \ -o "$enabled_ciphers" = "yes" \ -o "$enabled_ciphers" = "no"; then enabled_ciphers=$default_ciphers fi AC_MSG_CHECKING([which symmetric ciphers to include]) for cipher in $enabled_ciphers; do LIST_MEMBER($cipher, $available_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported cipher "$cipher" specified]) fi done AC_MSG_RESULT([$enabled_ciphers]) # Implementation of the --enable-pubkey-ciphers switch. AC_ARG_ENABLE(pubkey-ciphers, AC_HELP_STRING([--enable-pubkey-ciphers=ciphers], [select the public-key ciphers to include]), [enabled_pubkey_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_pubkey_ciphers=""]) if test "x$enabled_pubkey_ciphers" = "x" \ -o "$enabled_pubkey_ciphers" = "yes" \ -o "$enabled_pubkey_ciphers" = "no"; then enabled_pubkey_ciphers=$default_pubkey_ciphers fi AC_MSG_CHECKING([which public-key ciphers to include]) for cipher in $enabled_pubkey_ciphers; do LIST_MEMBER($cipher, $available_pubkey_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported public-key cipher specified]) fi done AC_MSG_RESULT([$enabled_pubkey_ciphers]) # Implementation of the --enable-digests switch. AC_ARG_ENABLE(digests, AC_HELP_STRING([--enable-digests=digests], [select the message digests to include]), [enabled_digests=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_digests=""]) if test "x$enabled_digests" = "x" \ -o "$enabled_digests" = "yes" \ -o "$enabled_digests" = "no"; then enabled_digests=$default_digests fi AC_MSG_CHECKING([which message digests to include]) for digest in $enabled_digests; do LIST_MEMBER($digest, $available_digests) if test "$found" = "0"; then AC_MSG_ERROR([unsupported message digest specified]) fi done AC_MSG_RESULT([$enabled_digests]) # Implementation of the --enable-kdfs switch. AC_ARG_ENABLE(kdfs, AC_HELP_STRING([--enable-kfds=kdfs], [select the KDFs to include]), [enabled_kdfs=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_kdfs=""]) if test "x$enabled_kdfs" = "x" \ -o "$enabled_kdfs" = "yes" \ -o "$enabled_kdfs" = "no"; then enabled_kdfs=$default_kdfs fi AC_MSG_CHECKING([which key derivation functions to include]) for kdf in $enabled_kdfs; do LIST_MEMBER($kdf, $available_kdfs) if test "$found" = "0"; then AC_MSG_ERROR([unsupported key derivation function specified]) fi done AC_MSG_RESULT([$enabled_kdfs]) # Implementation of the --enable-random switch. AC_ARG_ENABLE(random, AC_HELP_STRING([--enable-random=name], [select which random number generator to use]), [random=`echo $enableval | tr '[A-Z]' '[a-z]'`], []) if test "x$random" = "x" -o "$random" = "yes" -o "$random" = "no"; then random=default fi AC_MSG_CHECKING([which random module to use]) if test "$random" != "default" -a "$random" != "auto"; then LIST_MEMBER($random, $available_random_modules) if test "$found" = "0"; then AC_MSG_ERROR([unsupported random module specified]) fi fi AC_MSG_RESULT($random) # Implementation of the --disable-dev-random switch. AC_MSG_CHECKING([whether use of /dev/random is requested]) AC_ARG_ENABLE(dev-random, [ --disable-dev-random disable the use of dev random], try_dev_random=$enableval, try_dev_random=yes) AC_MSG_RESULT($try_dev_random) # Implementation of the --with-egd-socket switch. AC_ARG_WITH(egd-socket, [ --with-egd-socket=NAME Use NAME for the EGD socket)], egd_socket_name="$withval", egd_socket_name="" ) AC_DEFINE_UNQUOTED(EGD_SOCKET_NAME, "$egd_socket_name", [Define if you don't want the default EGD socket name. For details see cipher/rndegd.c]) # Implementation of the --enable-random-daemon AC_MSG_CHECKING([whether the experimental random daemon is requested]) AC_ARG_ENABLE([random-daemon], AC_HELP_STRING([--enable-random-daemon], [Build and support the experimental gcryptrnd]), [use_random_daemon=$enableval], [use_random_daemon=no]) AC_MSG_RESULT($use_random_daemon) if test x$use_random_daemon = xyes ; then AC_DEFINE(USE_RANDOM_DAEMON,1, [Define to support the experimental random daemon]) fi AM_CONDITIONAL(USE_RANDOM_DAEMON, test x$use_random_daemon = xyes) # Implementation of --disable-asm. AC_MSG_CHECKING([whether MPI assembler modules are requested]) AC_ARG_ENABLE([asm], AC_HELP_STRING([--disable-asm], [Disable MPI assembler modules]), [try_asm_modules=$enableval], [try_asm_modules=yes]) AC_MSG_RESULT($try_asm_modules) # Implementation of the --enable-m-guard switch. AC_MSG_CHECKING([whether memory guard is requested]) AC_ARG_ENABLE(m-guard, AC_HELP_STRING([--enable-m-guard], [Enable memory guard facility]), [use_m_guard=$enableval], [use_m_guard=no]) AC_MSG_RESULT($use_m_guard) if test "$use_m_guard" = yes ; then AC_DEFINE(M_GUARD,1,[Define to use the (obsolete) malloc guarding feature]) fi # Implementation of the --enable-large-data-tests switch. AC_MSG_CHECKING([whether to run large data tests]) AC_ARG_ENABLE(large-data-tests, AC_HELP_STRING([--enable-large-data-tests], [Enable the real long ruinning large data tests]), large_data_tests=$enableval,large_data_tests=no) AC_MSG_RESULT($large_data_tests) AC_SUBST(RUN_LARGE_DATA_TESTS, $large_data_tests) # Implementation of the --with-capabilities switch. # Check whether we want to use Linux capabilities AC_MSG_CHECKING([whether use of capabilities is requested]) AC_ARG_WITH(capabilities, AC_HELP_STRING([--with-capabilities], [Use linux capabilities [default=no]]), [use_capabilities="$withval"],[use_capabilities=no]) AC_MSG_RESULT($use_capabilities) # Implementation of the --enable-hmac-binary-check. AC_MSG_CHECKING([whether a HMAC binary check is requested]) AC_ARG_ENABLE(hmac-binary-check, AC_HELP_STRING([--enable-hmac-binary-check], [Enable library integrity check]), [use_hmac_binary_check=$enableval], [use_hmac_binary_check=no]) AC_MSG_RESULT($use_hmac_binary_check) if test "$use_hmac_binary_check" = yes ; then AC_DEFINE(ENABLE_HMAC_BINARY_CHECK,1, [Define to support an HMAC based integrity check]) fi # Implementation of the --disable-jent-support switch. AC_MSG_CHECKING([whether jitter entropy support is requested]) AC_ARG_ENABLE(jent-support, AC_HELP_STRING([--disable-jent-support], [Disable support for the Jitter entropy collector]), jentsupport=$enableval,jentsupport=yes) AC_MSG_RESULT($jentsupport) # Implementation of the --disable-padlock-support switch. AC_MSG_CHECKING([whether padlock support is requested]) AC_ARG_ENABLE(padlock-support, AC_HELP_STRING([--disable-padlock-support], [Disable support for the PadLock Engine of VIA processors]), padlocksupport=$enableval,padlocksupport=yes) AC_MSG_RESULT($padlocksupport) # Implementation of the --disable-aesni-support switch. AC_MSG_CHECKING([whether AESNI support is requested]) AC_ARG_ENABLE(aesni-support, AC_HELP_STRING([--disable-aesni-support], [Disable support for the Intel AES-NI instructions]), aesnisupport=$enableval,aesnisupport=yes) AC_MSG_RESULT($aesnisupport) # Implementation of the --disable-pclmul-support switch. AC_MSG_CHECKING([whether PCLMUL support is requested]) AC_ARG_ENABLE(pclmul-support, AC_HELP_STRING([--disable-pclmul-support], [Disable support for the Intel PCLMUL instructions]), pclmulsupport=$enableval,pclmulsupport=yes) AC_MSG_RESULT($pclmulsupport) # Implementation of the --disable-sse41-support switch. AC_MSG_CHECKING([whether SSE4.1 support is requested]) AC_ARG_ENABLE(sse41-support, AC_HELP_STRING([--disable-sse41-support], [Disable support for the Intel SSE4.1 instructions]), sse41support=$enableval,sse41support=yes) AC_MSG_RESULT($sse41support) # Implementation of the --disable-drng-support switch. AC_MSG_CHECKING([whether DRNG support is requested]) AC_ARG_ENABLE(drng-support, AC_HELP_STRING([--disable-drng-support], [Disable support for the Intel DRNG (RDRAND instruction)]), drngsupport=$enableval,drngsupport=yes) AC_MSG_RESULT($drngsupport) # Implementation of the --disable-avx-support switch. AC_MSG_CHECKING([whether AVX support is requested]) AC_ARG_ENABLE(avx-support, AC_HELP_STRING([--disable-avx-support], [Disable support for the Intel AVX instructions]), avxsupport=$enableval,avxsupport=yes) AC_MSG_RESULT($avxsupport) # Implementation of the --disable-avx2-support switch. AC_MSG_CHECKING([whether AVX2 support is requested]) AC_ARG_ENABLE(avx2-support, AC_HELP_STRING([--disable-avx2-support], [Disable support for the Intel AVX2 instructions]), avx2support=$enableval,avx2support=yes) AC_MSG_RESULT($avx2support) # Implementation of the --disable-neon-support switch. AC_MSG_CHECKING([whether NEON support is requested]) AC_ARG_ENABLE(neon-support, AC_HELP_STRING([--disable-neon-support], [Disable support for the ARM NEON instructions]), neonsupport=$enableval,neonsupport=yes) AC_MSG_RESULT($neonsupport) # Implementation of the --disable-arm-crypto-support switch. AC_MSG_CHECKING([whether ARMv8 Crypto Extension support is requested]) AC_ARG_ENABLE(arm-crypto-support, AC_HELP_STRING([--disable-arm-crypto-support], [Disable support for the ARMv8 Crypto Extension instructions]), armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], AC_HELP_STRING([--disable-O-flag-munging], [Disable modification of the cc -O flag]), [enable_o_flag_munging=$enableval], [enable_o_flag_munging=yes]) AC_MSG_RESULT($enable_o_flag_munging) AM_CONDITIONAL(ENABLE_O_FLAG_MUNGING, test "$enable_o_flag_munging" = "yes") # Implementation of the --disable-amd64-as-feature-detection switch. AC_MSG_CHECKING([whether to enable AMD64 as(1) feature detection]) AC_ARG_ENABLE(amd64-as-feature-detection, AC_HELP_STRING([--disable-amd64-as-feature-detection], [Disable the auto-detection of AMD64 as(1) features]), amd64_as_feature_detection=$enableval, amd64_as_feature_detection=yes) AC_MSG_RESULT($amd64_as_feature_detection) AC_DEFINE_UNQUOTED(PRINTABLE_OS_NAME, "$PRINTABLE_OS_NAME", [A human readable text with the name of the OS]) # For some systems we know that we have ld_version scripts. # Use it then as default. have_ld_version_script=no case "${host}" in *-*-linux*) have_ld_version_script=yes ;; *-*-gnu*) have_ld_version_script=yes ;; esac AC_ARG_ENABLE([ld-version-script], AC_HELP_STRING([--enable-ld-version-script], [enable/disable use of linker version script. (default is system dependent)]), [have_ld_version_script=$enableval], [ : ] ) AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$have_ld_version_script" = "yes") AC_DEFINE_UNQUOTED(NAME_OF_DEV_RANDOM, "$NAME_OF_DEV_RANDOM", [defined to the name of the strong random device]) AC_DEFINE_UNQUOTED(NAME_OF_DEV_URANDOM, "$NAME_OF_DEV_URANDOM", [defined to the name of the weaker random device]) ############################### #### Checks for libraries. #### ############################### # # gpg-error is required. # AM_PATH_GPG_ERROR("$NEED_GPG_ERROR_VERSION") if test "x$GPG_ERROR_LIBS" = "x"; then AC_MSG_ERROR([libgpg-error is needed. See ftp://ftp.gnupg.org/gcrypt/libgpg-error/ .]) fi AC_DEFINE(GPG_ERR_SOURCE_DEFAULT, GPG_ERR_SOURCE_GCRYPT, [The default error source for libgcrypt.]) # # Check whether the GNU Pth library is available. We require this # to build the optional gcryptrnd program. # AC_ARG_WITH(pth-prefix, AC_HELP_STRING([--with-pth-prefix=PFX], [prefix where GNU Pth is installed (optional)]), pth_config_prefix="$withval", pth_config_prefix="") if test x$pth_config_prefix != x ; then PTH_CONFIG="$pth_config_prefix/bin/pth-config" fi if test "$use_random_daemon" = "yes"; then AC_PATH_PROG(PTH_CONFIG, pth-config, no) if test "$PTH_CONFIG" = "no"; then AC_MSG_WARN([[ *** *** To build the Libgcrypt's random number daemon *** we need the support of the GNU Portable Threads Library. *** Download it from ftp://ftp.gnu.org/gnu/pth/ *** On a Debian GNU/Linux system you might want to try *** apt-get install libpth-dev ***]]) else GNUPG_PTH_VERSION_CHECK([1.3.7]) if test $have_pth = yes; then PTH_CFLAGS=`$PTH_CONFIG --cflags` PTH_LIBS=`$PTH_CONFIG --ldflags` PTH_LIBS="$PTH_LIBS `$PTH_CONFIG --libs --all`" AC_DEFINE(USE_GNU_PTH, 1, [Defined if the GNU Portable Thread Library should be used]) AC_DEFINE(HAVE_PTH, 1, [Defined if the GNU Pth is available]) fi fi fi AC_SUBST(PTH_CFLAGS) AC_SUBST(PTH_LIBS) # # Check whether pthreads is available # if test "$have_w32_system" != yes; then AC_CHECK_LIB(pthread,pthread_create,have_pthread=yes) if test "$have_pthread" = yes; then AC_DEFINE(HAVE_PTHREAD, 1 ,[Define if we have pthread.]) fi fi # Solaris needs -lsocket and -lnsl. Unisys system includes # gethostbyname in libsocket but needs libnsl for socket. AC_SEARCH_LIBS(setsockopt, [socket], , [AC_SEARCH_LIBS(setsockopt, [socket], , , [-lnsl])]) AC_SEARCH_LIBS(setsockopt, [nsl]) ################################## #### Checks for header files. #### ################################## AC_HEADER_STDC AC_CHECK_HEADERS(unistd.h sys/select.h sys/msg.h) INSERT_SYS_SELECT_H= if test x"$ac_cv_header_sys_select_h" = xyes; then INSERT_SYS_SELECT_H=" include " fi AC_SUBST(INSERT_SYS_SELECT_H) ########################################## #### Checks for typedefs, structures, #### #### and compiler characteristics. #### ########################################## AC_C_CONST AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_SIGNAL AC_DECL_SYS_SIGLIST AC_TYPE_PID_T GNUPG_CHECK_TYPEDEF(byte, HAVE_BYTE_TYPEDEF) GNUPG_CHECK_TYPEDEF(ushort, HAVE_USHORT_TYPEDEF) GNUPG_CHECK_TYPEDEF(ulong, HAVE_ULONG_TYPEDEF) GNUPG_CHECK_TYPEDEF(u16, HAVE_U16_TYPEDEF) GNUPG_CHECK_TYPEDEF(u32, HAVE_U32_TYPEDEF) gl_TYPE_SOCKLEN_T case "${host}" in *-*-mingw32*) # socklen_t may or may not be defined depending on what headers # are included. To be safe we use int as this is the actual type. FALLBACK_SOCKLEN_T="typedef int gcry_socklen_t;" ;; *) if test ".$gl_cv_socklen_t_equiv" = "."; then FALLBACK_SOCKLEN_T="typedef socklen_t gcry_socklen_t;" else FALLBACK_SOCKLEN_T="typedef ${gl_cv_socklen_t_equiv} gcry_socklen_t;" fi esac AC_SUBST(FALLBACK_SOCKLEN_T) # # Check for __builtin_bswap32 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap32, [gcry_cv_have_builtin_bswap32], [gcry_cv_have_builtin_bswap32=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [int x = 0; int y = __builtin_bswap32(x); return y;])], [gcry_cv_have_builtin_bswap32=yes])]) if test "$gcry_cv_have_builtin_bswap32" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP32,1, [Defined if compiler has '__builtin_bswap32' intrinsic]) fi # # Check for __builtin_bswap64 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap64, [gcry_cv_have_builtin_bswap64], [gcry_cv_have_builtin_bswap64=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [long long x = 0; long long y = __builtin_bswap64(x); return y;])], [gcry_cv_have_builtin_bswap64=yes])]) if test "$gcry_cv_have_builtin_bswap64" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP64,1, [Defined if compiler has '__builtin_bswap64' intrinsic]) fi # # Check for __builtin_ctz intrinsic. # AC_CACHE_CHECK(for __builtin_ctz, [gcry_cv_have_builtin_ctz], [gcry_cv_have_builtin_ctz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_ctz(x); return y;])], [gcry_cv_have_builtin_ctz=yes])]) if test "$gcry_cv_have_builtin_ctz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZ, 1, [Defined if compiler has '__builtin_ctz' intrinsic]) fi # # Check for VLA support (variable length arrays). # AC_CACHE_CHECK(whether the variable length arrays are supported, [gcry_cv_have_vla], [gcry_cv_have_vla=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void f1(char *, int); char foo(int i) { char b[(i < 0 ? 0 : i) + 1]; f1(b, sizeof b); return b[0];}]])], [gcry_cv_have_vla=yes])]) if test "$gcry_cv_have_vla" = "yes" ; then AC_DEFINE(HAVE_VLA,1, [Defined if variable length arrays are supported]) fi # # Check for ELF visibility support. # AC_CACHE_CHECK(whether the visibility attribute is supported, gcry_cv_visibility_attribute, [gcry_cv_visibility_attribute=no AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo __attribute__ ((visibility ("hidden"))) = 1; int bar __attribute__ ((visibility ("protected"))) = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden.*foo' conftest.s >/dev/null 2>&1 ; then if grep '\.protected.*bar' conftest.s >/dev/null 2>&1; then gcry_cv_visibility_attribute=yes fi fi fi ]) if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken visibility attribute, gcry_cv_broken_visibility_attribute, [gcry_cv_broken_visibility_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo (int x); int bar (int x) __asm__ ("foo") __attribute__ ((visibility ("hidden"))); int bar (int x) { return x; } ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden@<:@ _@:>@foo' conftest.s >/dev/null 2>&1; then gcry_cv_broken_visibility_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken alias attribute, gcry_cv_broken_alias_attribute, [gcry_cv_broken_alias_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[extern int foo (int x) __asm ("xyzzy"); int bar (int x) { return x; } extern __typeof (bar) foo __attribute ((weak, alias ("bar"))); extern int dfoo; extern __typeof (dfoo) dfoo __asm ("abccb"); int dfoo = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep 'xyzzy' conftest.s >/dev/null 2>&1 && \ grep 'abccb' conftest.s >/dev/null 2>&1; then gcry_cv_broken_alias_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(if gcc supports -fvisibility=hidden, gcry_cv_gcc_has_f_visibility, [gcry_cv_gcc_has_f_visibility=no _gcc_cflags_save=$CFLAGS CFLAGS="-fvisibility=hidden" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])], gcry_cv_gcc_has_f_visibility=yes) CFLAGS=$_gcc_cflags_save; ]) fi if test "$gcry_cv_visibility_attribute" = "yes" \ && test "$gcry_cv_broken_visibility_attribute" != "yes" \ && test "$gcry_cv_broken_alias_attribute" != "yes" \ && test "$gcry_cv_gcc_has_f_visibility" = "yes" then AC_DEFINE(GCRY_USE_VISIBILITY, 1, [Define to use the GNU C visibility attribute.]) CFLAGS="$CFLAGS -fvisibility=hidden" fi # Following attribute tests depend on warnings to cause compile to fail, # so set -Werror temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether the compiler supports the GCC style aligned attribute # AC_CACHE_CHECK([whether the GCC style aligned attribute is supported], [gcry_cv_gcc_attribute_aligned], [gcry_cv_gcc_attribute_aligned=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct { int a; } foo __attribute__ ((aligned (16)));]])], [gcry_cv_gcc_attribute_aligned=yes])]) if test "$gcry_cv_gcc_attribute_aligned" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_ALIGNED,1, [Defined if a GCC style "__attribute__ ((aligned (n))" is supported]) fi # # Check whether the compiler supports the GCC style packed attribute # AC_CACHE_CHECK([whether the GCC style packed attribute is supported], [gcry_cv_gcc_attribute_packed], [gcry_cv_gcc_attribute_packed=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct foolong_s { long b; } __attribute__ ((packed)); struct foo_s { char a; struct foolong_s b; } __attribute__ ((packed)); enum bar { FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))), };]])], [gcry_cv_gcc_attribute_packed=yes])]) if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1, [Defined if a GCC style "__attribute__ ((packed))" is supported]) fi # # Check whether the compiler supports the GCC style may_alias attribute # AC_CACHE_CHECK([whether the GCC style may_alias attribute is supported], [gcry_cv_gcc_attribute_may_alias], [gcry_cv_gcc_attribute_may_alias=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[typedef struct foo_s { int a; } __attribute__ ((may_alias)) foo_t;]])], [gcry_cv_gcc_attribute_may_alias=yes])]) if test "$gcry_cv_gcc_attribute_may_alias" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MAY_ALIAS,1, [Defined if a GCC style "__attribute__ ((may_alias))" is supported]) fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether the compiler supports 'asm' or '__asm__' keyword for # assembler blocks. # AC_CACHE_CHECK([whether 'asm' assembler keyword is supported], [gcry_cv_have_asm], [gcry_cv_have_asm=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm("":::"memory"); }]])], [gcry_cv_have_asm=yes])]) AC_CACHE_CHECK([whether '__asm__' assembler keyword is supported], [gcry_cv_have___asm__], [gcry_cv_have___asm__=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("":::"memory"); }]])], [gcry_cv_have___asm__=yes])]) if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_DEFINE(asm,__asm__, [Define to supported assembler block keyword, if plain 'asm' was not supported]) fi fi # # Check whether the compiler supports inline assembly memory barrier. # if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__ volatile("":::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi else AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm volatile("":::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_VOLATILE_MEMORY,1, [Define if inline asm memory barrier is supported]) fi # # Check whether GCC assembler supports features needed for our ARM # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly implementations], [gcry_cv_gcc_arm_platform_as_ok], [gcry_cv_gcc_arm_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( /* Test if assembler supports UAL syntax. */ ".syntax unified\n\t" ".arm\n\t" /* our assembly code is in ARM mode */ /* Following causes error if assembler ignored '.syntax unified'. */ "asmfunc:\n\t" "add %r0, %r0, %r4, ror #12;\n\t" /* Test if '.type' and '.size' are supported. */ ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,%function;\n\t" );]])], [gcry_cv_gcc_arm_platform_as_ok=yes])]) if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARM assembly implementations]) fi # # Check whether GCC assembler supports features needed for our ARMv8/Aarch64 # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly implementations], [gcry_cv_gcc_aarch64_platform_as_ok], [gcry_cv_gcc_aarch64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( "asmfunc:\n\t" "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" /* Test if '.type' and '.size' are supported. */ ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,@function;\n\t" );]])], [gcry_cv_gcc_aarch64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARMv8/Aarch64 assembly implementations]) fi # # Check whether underscores in symbols are required. This needs to be # done before setting up the assembler stuff. # GNUPG_SYS_SYMBOL_UNDERSCORE() ################################# #### #### #### Setup assembler stuff. #### #### Define mpi_cpu_arch. #### #### #### ################################# AC_ARG_ENABLE(mpi-path, AC_HELP_STRING([--enable-mpi-path=EXTRA_PATH], [prepend EXTRA_PATH to list of CPU specific optimizations]), mpi_extra_path="$enableval",mpi_extra_path="") AC_MSG_CHECKING(architecture and mpi assembler functions) if test -f $srcdir/mpi/config.links ; then . $srcdir/mpi/config.links AC_CONFIG_LINKS("$mpi_ln_list") ac_cv_mpi_sflags="$mpi_sflags" AC_MSG_RESULT($mpi_cpu_arch) else AC_MSG_RESULT(failed) AC_MSG_ERROR([mpi/config.links missing!]) fi MPI_SFLAGS="$ac_cv_mpi_sflags" AC_SUBST(MPI_SFLAGS) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_ADD1, test "$mpi_mod_asm_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_SUB1, test "$mpi_mod_asm_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL1, test "$mpi_mod_asm_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL2, test "$mpi_mod_asm_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL3, test "$mpi_mod_asm_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_LSHIFT, test "$mpi_mod_asm_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_RSHIFT, test "$mpi_mod_asm_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV, test "$mpi_mod_asm_udiv" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV_QRNND, test "$mpi_mod_asm_udiv_qrnnd" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_ADD1, test "$mpi_mod_c_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_SUB1, test "$mpi_mod_c_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL1, test "$mpi_mod_c_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL2, test "$mpi_mod_c_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL3, test "$mpi_mod_c_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_LSHIFT, test "$mpi_mod_c_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_RSHIFT, test "$mpi_mod_c_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV, test "$mpi_mod_c_udiv" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes) # Reset non applicable feature flags. if test "$mpi_cpu_arch" != "x86" ; then aesnisupport="n/a" pclmulsupport="n/a" sse41support="n/a" avxsupport="n/a" avx2support="n/a" padlocksupport="n/a" jentsupport="n/a" drngsupport="n/a" fi if test "$mpi_cpu_arch" != "arm" ; then if test "$mpi_cpu_arch" != "aarch64" ; then neonsupport="n/a" armcryptosupport="n/a" fi fi ############################################# #### #### #### Platform specific compiler checks. #### #### #### ############################################# # Following tests depend on warnings to cause compile to fail, so set -Werror # temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether compiler supports 'ms_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute], [gcry_cv_gcc_attribute_ms_abi], [gcry_cv_gcc_attribute_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((ms_abi)) proto(int);]])], [gcry_cv_gcc_attribute_ms_abi=yes])]) if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1, [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute]) fi # # Check whether compiler supports 'sysv_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute], [gcry_cv_gcc_attribute_sysv_abi], [gcry_cv_gcc_attribute_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((sysv_abi)) proto(int);]])], [gcry_cv_gcc_attribute_sysv_abi=yes])]) if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1, [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute]) fi # # Check whether default calling convention is 'ms_abi'. # if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'ms_abi'], [gcry_cv_gcc_default_abi_is_ms_abi], [gcry_cv_gcc_default_abi_is_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((ms_abi))(*msabi_func)(void); /* warning on SysV abi targets, passes on Windows based targets */ msabi_func = def_func; return msabi_func; }]])], [gcry_cv_gcc_default_abi_is_ms_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1, [Defined if default calling convention is 'ms_abi']) fi fi # # Check whether default calling convention is 'sysv_abi'. # if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'], [gcry_cv_gcc_default_abi_is_sysv_abi], [gcry_cv_gcc_default_abi_is_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((sysv_abi))(*sysvabi_func)(void); /* warning on MS ABI targets, passes on SysV ABI targets */ sysvabi_func = def_func; return sysvabi_func; }]])], [gcry_cv_gcc_default_abi_is_sysv_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1, [Defined if default calling convention is 'sysv_abi']) fi fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports SSSE3 instructions # This is required for the AES-NI instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSSE3 instructions], [gcry_cv_gcc_inline_asm_ssse3], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_ssse3="n/a" else gcry_cv_gcc_inline_asm_ssse3=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[static unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; void a(void) { __asm__("pshufb %[mask], %%xmm2\n\t"::[mask]"m"(*be_mask):); }]])], [gcry_cv_gcc_inline_asm_ssse3=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ssse3" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSSE3,1, [Defined if inline assembler supports SSSE3 instructions]) fi # # Check whether GCC inline assembler supports PCLMUL instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions], [gcry_cv_gcc_inline_asm_pclmul], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_pclmul="n/a" else gcry_cv_gcc_inline_asm_pclmul=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc"); }]])], [gcry_cv_gcc_inline_asm_pclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1, [Defined if inline assembler supports PCLMUL instructions]) fi # # Check whether GCC inline assembler supports SSE4.1 instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSE4.1 instructions], [gcry_cv_gcc_inline_asm_sse41], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_sse41="n/a" else gcry_cv_gcc_inline_asm_sse41=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { int i; __asm__("pextrd \$2, %%xmm0, %[out]\n\t" : [out] "=m" (i)); }]])], [gcry_cv_gcc_inline_asm_sse41=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_sse41" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSE41,1, [Defined if inline assembler supports SSE4.1 instructions]) fi # # Check whether GCC inline assembler supports AVX instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions], [gcry_cv_gcc_inline_asm_avx], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_avx="n/a" else gcry_cv_gcc_inline_asm_avx=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("xgetbv; vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):); }]])], [gcry_cv_gcc_inline_asm_avx=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX,1, [Defined if inline assembler supports AVX instructions]) fi # # Check whether GCC inline assembler supports AVX2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions], [gcry_cv_gcc_inline_asm_avx2], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_avx2="n/a" else gcry_cv_gcc_inline_asm_avx2=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc"); }]])], [gcry_cv_gcc_inline_asm_avx2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX2,1, [Defined if inline assembler supports AVX2 instructions]) fi # # Check whether GCC inline assembler supports BMI2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports BMI2 instructions], [gcry_cv_gcc_inline_asm_bmi2], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_bmi2="n/a" else gcry_cv_gcc_inline_asm_bmi2=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("rorxl \$23, %%eax, %%edx\\n\\t":::"memory"); }]])], [gcry_cv_gcc_inline_asm_bmi2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_bmi2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_BMI2,1, [Defined if inline assembler supports BMI2 instructions]) fi # # Check whether GCC assembler needs "-Wa,--divide" to correctly handle # constant division # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler handles division correctly], [gcry_cv_gcc_as_const_division_ok], [gcry_cv_gcc_as_const_division_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__("xorl \$(123456789/12345678), %ebp;\n\t");]])], [gcry_cv_gcc_as_const_division_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_ok" = "no" ; then # # Add '-Wa,--divide' to CPPFLAGS and try check again. # _gcc_cppflags_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -Wa,--divide" AC_CACHE_CHECK([whether GCC assembler handles division correctly with "-Wa,--divide"], [gcry_cv_gcc_as_const_division_with_wadivide_ok], [gcry_cv_gcc_as_const_division_with_wadivide_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__("xorl \$(123456789/12345678), %ebp;\n\t");]])], [gcry_cv_gcc_as_const_division_with_wadivide_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_with_wadivide_ok" = "no" ; then # '-Wa,--divide' did not work, restore old flags. CPPFLAGS="$_gcc_cppflags_save" fi fi fi # # Check whether GCC assembler supports features needed for our amd64 # implementations # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler is compatible for amd64 assembly implementations], [gcry_cv_gcc_amd64_platform_as_ok], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_amd64_platform_as_ok="n/a" else gcry_cv_gcc_amd64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( /* Test if '.type' and '.size' are supported. */ /* These work only on ELF targets. */ "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,@function;\n\t" /* Test if assembler allows use of '/' for constant division * (Solaris/x86 issue). If previous constant division check * and "-Wa,--divide" workaround failed, this causes assembly * to be disable on this machine. */ "xorl \$(123456789/12345678), %ebp;\n\t" );]])], [gcry_cv_gcc_amd64_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with amd64 assembly implementations]) fi if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" && test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" && test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations], [gcry_cv_gcc_win64_platform_as_ok], [gcry_cv_gcc_win64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".globl asmfunc\n\t" "asmfunc:\n\t" "xorq \$(1234), %rbp;\n\t" );]])], [gcry_cv_gcc_win64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with WIN64 assembly implementations]) fi fi fi # # Check whether GCC assembler supports features needed for assembly # implementations that use Intel syntax # AC_CACHE_CHECK([whether GCC assembler is compatible for Intel syntax assembly implementations], [gcry_cv_gcc_platform_as_ok_for_intel_syntax], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_platform_as_ok_for_intel_syntax="n/a" else gcry_cv_gcc_platform_as_ok_for_intel_syntax=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".intel_syntax noprefix\n\t" "pxor xmm1, xmm7;\n\t" /* Intel syntax implementation also use GAS macros, so check * for them here. */ "VAL_A = xmm4\n\t" "VAL_B = xmm2\n\t" ".macro SET_VAL_A p1\n\t" " VAL_A = \\\\p1 \n\t" ".endm\n\t" ".macro SET_VAL_B p1\n\t" " VAL_B = \\\\p1 \n\t" ".endm\n\t" "vmovdqa VAL_A, VAL_B;\n\t" "SET_VAL_A eax\n\t" "SET_VAL_B ebp\n\t" "add VAL_A, VAL_B;\n\t" "add VAL_B, 0b10101;\n\t" );]])], [gcry_cv_gcc_platform_as_ok_for_intel_syntax=yes]) fi]) if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then AC_DEFINE(HAVE_INTEL_SYNTAX_PLATFORM_AS,1, [Defined if underlying assembler is compatible with Intel syntax assembly implementations]) fi # # Check whether compiler is configured for ARMv6 or newer architecture # AC_CACHE_CHECK([whether compiler is configured for ARMv6 or newer architecture], [gcry_cv_cc_arm_arch_is_v6], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_cc_arm_arch_is_v6="n/a" else gcry_cv_cc_arm_arch_is_v6=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[ #if defined(__arm__) && \ ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7EM__)) /* empty */ #else /* fail compile if not ARMv6. */ not_armv6 not_armv6 = (not_armv6)not_armv6; #endif ]])], [gcry_cv_cc_arm_arch_is_v6=yes]) fi]) if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then AC_DEFINE(HAVE_ARM_ARCH_V6,1, [Defined if ARM architecture is v6 or newer]) fi # # Check whether GCC inline assembler supports NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports NEON instructions], [gcry_cv_gcc_inline_asm_neon], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_gcc_inline_asm_neon="n/a" else gcry_cv_gcc_inline_asm_neon=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".syntax unified\n\t" ".arm\n\t" ".fpu neon\n\t" "vld1.64 {%q0-%q1}, [%r0]!;\n\t" "vrev64.8 %q0, %q3;\n\t" "vadd.u64 %q0, %q1;\n\t" "vadd.s64 %d3, %d2, %d3;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_NEON,1, [Defined if inline assembler supports NEON instructions]) fi # # Check whether GCC inline assembler supports AArch32 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch32 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch32_crypto], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_gcc_inline_asm_aarch32_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch32_crypto=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".syntax unified\n\t" ".arch armv8-a\n\t" ".arm\n\t" ".fpu crypto-neon-fp-armv8\n\t" "sha1h.32 q0, q0;\n\t" "sha1c.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha1su0.32 q0, q0, q0;\n\t" "sha1su1.32 q0, q0;\n\t" "sha256h.32 q0, q0, q0;\n\t" "sha256h2.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha256su0.32 q0, q0;\n\t" "sha256su1.32 q0, q0, q15;\n\t" "aese.8 q0, q0;\n\t" "aesd.8 q0, q0;\n\t" "aesmc.8 q0, q0;\n\t" "aesimc.8 q0, q0;\n\t" "vmull.p64 q0, d0, d0;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch32_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO,1, [Defined if inline assembler supports AArch32 Crypto Extension instructions]) fi # # Check whether GCC inline assembler supports AArch64 NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 NEON instructions], [gcry_cv_gcc_inline_asm_aarch64_neon], [if test "$mpi_cpu_arch" != "aarch64" ; then gcry_cv_gcc_inline_asm_aarch64_neon="n/a" else gcry_cv_gcc_inline_asm_aarch64_neon=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".cpu generic+simd\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch64_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_NEON,1, [Defined if inline assembler supports AArch64 NEON instructions]) fi # # Check whether GCC inline assembler supports AArch64 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch64_crypto], [if test "$mpi_cpu_arch" != "aarch64" ; then gcry_cv_gcc_inline_asm_aarch64_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch64_crypto=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".cpu generic+simd+crypto\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" "sha1h s0, s0;\n\t" "sha1c q0, s0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha1su0 v0.4s, v0.4s, v0.4s;\n\t" "sha1su1 v0.4s, v0.4s;\n\t" "sha256h q0, q0, v0.4s;\n\t" "sha256h2 q0, q0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha256su0 v0.4s, v0.4s;\n\t" "sha256su1 v0.4s, v0.4s, v31.4s;\n\t" "aese v0.16b, v0.16b;\n\t" "aesd v0.16b, v0.16b;\n\t" "aesmc v0.16b, v0.16b;\n\t" "aesimc v0.16b, v0.16b;\n\t" "pmull v0.1q, v0.1d, v31.1d;\n\t" "pmull2 v0.1q, v0.2d, v31.2d;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch64_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO,1, [Defined if inline assembler supports AArch64 Crypto Extension instructions]) fi ####################################### #### Checks for library functions. #### ####################################### AC_FUNC_VPRINTF # We have replacements for these in src/missing-string.c AC_CHECK_FUNCS(stpcpy strcasecmp) # We have replacements for these in src/g10lib.h AC_CHECK_FUNCS(strtoul memmove stricmp atexit raise) # Other checks AC_CHECK_FUNCS(strerror rand mmap getpagesize sysconf waitpid wait4) AC_CHECK_FUNCS(gettimeofday getrusage gethrtime clock_gettime syslog) AC_CHECK_FUNCS(syscall fcntl ftruncate flockfile) GNUPG_CHECK_MLOCK # # Replacement functions. # AC_REPLACE_FUNCS([getpid clock]) # # Check whether it is necessary to link against libdl. # DL_LIBS="" if test "$use_hmac_binary_check" = yes ; then _gcry_save_libs="$LIBS" LIBS="" AC_SEARCH_LIBS(dlopen, c dl,,,) DL_LIBS=$LIBS LIBS="$_gcry_save_libs" LIBGCRYPT_CONFIG_LIBS="${LIBGCRYPT_CONFIG_LIBS} ${DL_LIBS}" fi AC_SUBST(DL_LIBS) # # Check whether we can use Linux capabilities as requested. # if test "$use_capabilities" = "yes" ; then use_capabilities=no AC_CHECK_HEADERS(sys/capability.h) if test "$ac_cv_header_sys_capability_h" = "yes" ; then AC_CHECK_LIB(cap, cap_init, ac_need_libcap=1) if test "$ac_cv_lib_cap_cap_init" = "yes"; then AC_DEFINE(USE_CAPABILITIES,1, [define if capabilities should be used]) LIBS="$LIBS -lcap" use_capabilities=yes fi fi if test "$use_capabilities" = "no" ; then AC_MSG_WARN([[ *** *** The use of capabilities on this system is not possible. *** You need a recent Linux kernel and some patches: *** fcaps-2.2.9-990610.patch (kernel patch for 2.2.9) *** fcap-module-990613.tar.gz (kernel module) *** libcap-1.92.tar.gz (user mode library and utilities) *** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN *** set (filesystems menu). Be warned: This code is *really* ALPHA. ***]]) fi fi # Check whether a random device is available. if test "$try_dev_random" = yes ; then AC_CACHE_CHECK(for random device, ac_cv_have_dev_random, [if test -r "$NAME_OF_DEV_RANDOM" && test -r "$NAME_OF_DEV_URANDOM" ; then ac_cv_have_dev_random=yes; else ac_cv_have_dev_random=no; fi]) if test "$ac_cv_have_dev_random" = yes; then AC_DEFINE(HAVE_DEV_RANDOM,1, [defined if the system supports a random device] ) fi else AC_MSG_CHECKING(for random device) ac_cv_have_dev_random=no AC_MSG_RESULT(has been disabled) fi # Figure out the random modules for this configuration. if test "$random" = "default"; then # Select default value. if test "$ac_cv_have_dev_random" = yes; then # Try Linuxish random device. random_modules="linux" else case "${host}" in *-*-mingw32ce*) # WindowsCE random device. random_modules="w32ce" ;; *-*-mingw32*|*-*-cygwin*) # Windows random device. random_modules="w32" ;; *) # Build everything, allow to select at runtime. random_modules="$auto_random_modules" ;; esac fi else if test "$random" = "auto"; then # Build everything, allow to select at runtime. random_modules="$auto_random_modules" else random_modules="$random" fi fi # # Other defines # if test mym4_isgit = "yes"; then AC_DEFINE(IS_DEVELOPMENT_VERSION,1, [Defined if this is not a regular release]) fi AM_CONDITIONAL(CROSS_COMPILING, test x$cross_compiling = xyes) # This is handy for debugging so the compiler doesn't rearrange # things and eliminate variables. AC_ARG_ENABLE(optimization, AC_HELP_STRING([--disable-optimization], [disable compiler optimization]), [if test $enableval = no ; then CFLAGS=`echo $CFLAGS | sed 's/-O[[0-9]]//'` fi]) # CFLAGS mangling when using gcc. if test "$GCC" = yes; then CFLAGS="$CFLAGS -Wall" if test "$USE_MAINTAINER_MODE" = "yes"; then CFLAGS="$CFLAGS -Wcast-align -Wshadow -Wstrict-prototypes" CFLAGS="$CFLAGS -Wformat -Wno-format-y2k -Wformat-security" # If -Wno-missing-field-initializers is supported we can enable a # a bunch of really useful warnings. AC_MSG_CHECKING([if gcc supports -Wno-missing-field-initializers]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wno-missing-field-initializers" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -W -Wextra -Wbad-function-cast" CFLAGS="$CFLAGS -Wwrite-strings" CFLAGS="$CFLAGS -Wdeclaration-after-statement" CFLAGS="$CFLAGS -Wno-missing-field-initializers" CFLAGS="$CFLAGS -Wno-sign-compare" fi AC_MSG_CHECKING([if gcc supports -Wpointer-arith]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wpointer-arith" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -Wpointer-arith" fi fi fi # Check whether as(1) supports a noeexecstack feature. This test # includes an override option. CL_AS_NOEXECSTACK AC_SUBST(LIBGCRYPT_CONFIG_API_VERSION) AC_SUBST(LIBGCRYPT_CONFIG_LIBS) AC_SUBST(LIBGCRYPT_CONFIG_CFLAGS) AC_SUBST(LIBGCRYPT_CONFIG_HOST) AC_SUBST(LIBGCRYPT_THREAD_MODULES) AC_CONFIG_COMMANDS([gcrypt-conf],[[ chmod +x src/libgcrypt-config ]],[[ prefix=$prefix exec_prefix=$exec_prefix libdir=$libdir datadir=$datadir DATADIRNAME=$DATADIRNAME ]]) ##################### #### Conclusion. #### ##################### # Check that requested feature can actually be used and define # ENABLE_foo_SUPPORT macros. if test x"$aesnisupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_ssse3" != "yes" ; then aesnisupport="no (unsupported by compiler)" fi fi if test x"$pclmulsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then pclmulsupport="no (unsupported by compiler)" fi fi if test x"$sse41support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_sse41" != "yes" ; then sse41support="no (unsupported by compiler)" fi fi if test x"$avxsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then avxsupport="no (unsupported by compiler)" fi fi if test x"$avx2support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx2" != "yes" ; then avx2support="no (unsupported by compiler)" fi fi if test x"$neonsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_neon" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_neon" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$armcryptosupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$aesnisupport" = xyes ; then AC_DEFINE(ENABLE_AESNI_SUPPORT, 1, [Enable support for Intel AES-NI instructions.]) fi if test x"$pclmulsupport" = xyes ; then AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1, [Enable support for Intel PCLMUL instructions.]) fi if test x"$sse41support" = xyes ; then AC_DEFINE(ENABLE_SSE41_SUPPORT, 1, [Enable support for Intel SSE4.1 instructions.]) fi if test x"$avxsupport" = xyes ; then AC_DEFINE(ENABLE_AVX_SUPPORT,1, [Enable support for Intel AVX instructions.]) fi if test x"$avx2support" = xyes ; then AC_DEFINE(ENABLE_AVX2_SUPPORT,1, [Enable support for Intel AVX2 instructions.]) fi if test x"$neonsupport" = xyes ; then AC_DEFINE(ENABLE_NEON_SUPPORT,1, [Enable support for ARM NEON instructions.]) fi if test x"$armcryptosupport" = xyes ; then AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) fi if test x"$padlocksupport" = xyes ; then AC_DEFINE(ENABLE_PADLOCK_SUPPORT, 1, [Enable support for the PadLock engine.]) fi if test x"$drngsupport" = xyes ; then AC_DEFINE(ENABLE_DRNG_SUPPORT, 1, [Enable support for Intel DRNG (RDRAND instruction).]) fi # Define conditional sources and config.h symbols depending on the # selected ciphers, pubkey-ciphers, digests, kdfs, and random modules. LIST_MEMBER(arcfour, $enabled_ciphers) if test "$found" = "1"; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo" AC_DEFINE(USE_ARCFOUR, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour-amd64.lo" ;; esac fi LIST_MEMBER(blowfish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish.lo" AC_DEFINE(USE_BLOWFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-arm.lo" ;; esac fi LIST_MEMBER(cast5, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo" AC_DEFINE(USE_CAST5, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-arm.lo" ;; esac fi LIST_MEMBER(des, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo" AC_DEFINE(USE_DES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS des-amd64.lo" ;; esac fi LIST_MEMBER(aes, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael.lo" AC_DEFINE(USE_AES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-amd64.lo" # Build with the SSSE3 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ssse3-amd64.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ssse3-amd64-asm.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-arm.lo" # Build with the ARMv8/AArch32 CE implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-aarch64.lo" # Build with the ARMv8/AArch64 CE implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the AES-NI implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-aesni.lo" # Build with the Padlock implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-padlock.lo" ;; esac fi LIST_MEMBER(twofish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish.lo" AC_DEFINE(USE_TWOFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-amd64.lo" if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-avx2-amd64.lo" fi ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-aarch64.lo" ;; esac fi LIST_MEMBER(serpent, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent.lo" AC_DEFINE(USE_SERPENT, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the SSE2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-sse2-amd64.lo" ;; esac if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-avx2-amd64.lo" fi if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-armv7-neon.lo" fi fi LIST_MEMBER(rfc2268, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rfc2268.lo" AC_DEFINE(USE_RFC2268, 1, [Defined if this module should be included]) fi LIST_MEMBER(seed, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS seed.lo" AC_DEFINE(USE_SEED, 1, [Defined if this module should be included]) fi LIST_MEMBER(camellia, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia.lo camellia-glue.lo" AC_DEFINE(USE_CAMELLIA, 1, [Defined if this module should be included]) case "${host}" in arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aarch64.lo" ;; esac if test x"$avxsupport" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aesni-avx-amd64.lo" fi fi if test x"$avx2support" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aesni-avx2-amd64.lo" fi fi fi LIST_MEMBER(idea, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS idea.lo" AC_DEFINE(USE_IDEA, 1, [Defined if this module should be included]) fi LIST_MEMBER(salsa20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20.lo" AC_DEFINE(USE_SALSA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20-amd64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20-armv7-neon.lo" fi fi LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS gost28147.lo" AC_DEFINE(USE_GOST28147, 1, [Defined if this module should be included]) fi LIST_MEMBER(chacha20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20.lo" AC_DEFINE(USE_CHACHA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-sse2-amd64.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-ssse3-amd64.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-avx2-amd64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-armv7-neon.lo" fi fi -case "${host}" in - x86_64-*-*) - # Build with the assembly implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS poly1305-sse2-amd64.lo" - GCRYPT_CIPHERS="$GCRYPT_CIPHERS poly1305-avx2-amd64.lo" - ;; -esac - -if test x"$neonsupport" = xyes ; then - # Build with the NEON implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS poly1305-armv7-neon.lo" -fi - LIST_MEMBER(dsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS dsa.lo" AC_DEFINE(USE_DSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(rsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS rsa.lo" AC_DEFINE(USE_RSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(elgamal, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS elgamal.lo" AC_DEFINE(USE_ELGAMAL, 1, [Defined if this module should be included]) fi LIST_MEMBER(ecc, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi LIST_MEMBER(crc, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc.lo" AC_DEFINE(USE_CRC, 1, [Defined if this module should be included]) case "${host}" in i?86-*-* | x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo" ;; esac fi LIST_MEMBER(gostr3411-94, $enabled_digests) if test "$found" = "1" ; then # GOST R 34.11-94 internally uses GOST 28147-89 LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS gostr3411-94.lo" AC_DEFINE(USE_GOST_R_3411_94, 1, [Defined if this module should be included]) fi fi LIST_MEMBER(stribog, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS stribog.lo" AC_DEFINE(USE_GOST_R_3411_12, 1, [Defined if this module should be included]) fi LIST_MEMBER(md2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md2.lo" AC_DEFINE(USE_MD2, 1, [Defined if this module should be included]) fi LIST_MEMBER(md4, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md4.lo" AC_DEFINE(USE_MD4, 1, [Defined if this module should be included]) fi LIST_MEMBER(md5, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md5.lo" AC_DEFINE(USE_MD5, 1, [Defined if this module should be included]) fi LIST_MEMBER(rmd160, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS rmd160.lo" AC_DEFINE(USE_RMD160, 1, [Defined if this module should be included]) fi LIST_MEMBER(sha256, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256.lo" AC_DEFINE(USE_SHA256, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-armv8-aarch64-ce.lo" ;; esac fi LIST_MEMBER(sha512, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512.lo" AC_DEFINE(USE_SHA512, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-arm.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-armv7-neon.lo" fi fi LIST_MEMBER(sha3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak.lo" AC_DEFINE(USE_SHA3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation : ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak-armv7-neon.lo" fi fi LIST_MEMBER(tiger, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS tiger.lo" AC_DEFINE(USE_TIGER, 1, [Defined if this module should be included]) fi LIST_MEMBER(whirlpool, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool.lo" AC_DEFINE(USE_WHIRLPOOL, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool-sse2-amd64.lo" ;; esac fi LIST_MEMBER(blake2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2.lo" AC_DEFINE(USE_BLAKE2, 1, [Defined if this module should be included]) fi # SHA-1 needs to be included always for example because it is used by # random-csprng.c. GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1.lo" AC_DEFINE(USE_SHA1, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-avx-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv7-neon.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv8-aarch64-ce.lo" ;; esac LIST_MEMBER(sm3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo" AC_DEFINE(USE_SM3, 1, [Defined if this module should be included]) fi LIST_MEMBER(scrypt, $enabled_kdfs) if test "$found" = "1" ; then GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo" AC_DEFINE(USE_SCRYPT, 1, [Defined if this module should be included]) fi LIST_MEMBER(linux, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndlinux.lo" AC_DEFINE(USE_RNDLINUX, 1, [Defined if the /dev/random RNG should be used.]) fi LIST_MEMBER(unix, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndunix.lo" AC_DEFINE(USE_RNDUNIX, 1, [Defined if the default Unix RNG should be used.]) fi LIST_MEMBER(egd, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndegd.lo" AC_DEFINE(USE_RNDEGD, 1, [Defined if the EGD based RNG should be used.]) fi LIST_MEMBER(w32, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32.lo" AC_DEFINE(USE_RNDW32, 1, [Defined if the Windows specific RNG should be used.]) fi LIST_MEMBER(w32ce, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32ce.lo" AC_DEFINE(USE_RNDW32CE, 1, [Defined if the WindowsCE specific RNG should be used.]) fi AC_SUBST([GCRYPT_CIPHERS]) AC_SUBST([GCRYPT_PUBKEY_CIPHERS]) AC_SUBST([GCRYPT_DIGESTS]) AC_SUBST([GCRYPT_KDFS]) AC_SUBST([GCRYPT_RANDOM]) AC_SUBST(LIBGCRYPT_CIPHERS, $enabled_ciphers) AC_SUBST(LIBGCRYPT_PUBKEY_CIPHERS, $enabled_pubkey_ciphers) AC_SUBST(LIBGCRYPT_DIGESTS, $enabled_digests) # For printing the configuration we need a colon separated list of # algorithm names. tmp=`echo "$enabled_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_CIPHERS, "$tmp", [List of available cipher algorithms]) tmp=`echo "$enabled_pubkey_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_PUBKEY_CIPHERS, "$tmp", [List of available public key cipher algorithms]) tmp=`echo "$enabled_digests" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_DIGESTS, "$tmp", [List of available digest algorithms]) tmp=`echo "$enabled_kdfs" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_KDFS, "$tmp", [List of available KDF algorithms]) # # Define conditional sources depending on the used hardware platform. # Note that all possible modules must also be listed in # src/Makefile.am (EXTRA_libgcrypt_la_SOURCES). # GCRYPT_HWF_MODULES= case "$mpi_cpu_arch" in x86) AC_DEFINE(HAVE_CPU_ARCH_X86, 1, [Defined for the x86 platforms]) GCRYPT_HWF_MODULES="hwf-x86.lo" ;; alpha) AC_DEFINE(HAVE_CPU_ARCH_ALPHA, 1, [Defined for Alpha platforms]) ;; sparc) AC_DEFINE(HAVE_CPU_ARCH_SPARC, 1, [Defined for SPARC platforms]) ;; mips) AC_DEFINE(HAVE_CPU_ARCH_MIPS, 1, [Defined for MIPS platforms]) ;; m68k) AC_DEFINE(HAVE_CPU_ARCH_M68K, 1, [Defined for M68k platforms]) ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) GCRYPT_HWF_MODULES="hwf-arm.lo" ;; aarch64) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM AArch64 platforms]) GCRYPT_HWF_MODULES="hwf-arm.lo" ;; esac AC_SUBST([GCRYPT_HWF_MODULES]) # # Option to disable building of doc file # build_doc=yes AC_ARG_ENABLE([doc], AC_HELP_STRING([--disable-doc], [do not build the documentation]), build_doc=$enableval, build_doc=yes) AM_CONDITIONAL([BUILD_DOC], [test "x$build_doc" != xno]) # # Provide information about the build. # BUILD_REVISION="mym4_revision" AC_SUBST(BUILD_REVISION) AC_DEFINE_UNQUOTED(BUILD_REVISION, "$BUILD_REVISION", [GIT commit id revision used to build this package]) changequote(,)dnl BUILD_FILEVERSION=`echo "$VERSION" | sed 's/\([0-9.]*\).*/\1./;s/\./,/g'` changequote([,])dnl BUILD_FILEVERSION="${BUILD_FILEVERSION}mym4_revision_dec" AC_SUBST(BUILD_FILEVERSION) AC_ARG_ENABLE([build-timestamp], AC_HELP_STRING([--enable-build-timestamp], [set an explicit build timestamp for reproducibility. (default is the current time in ISO-8601 format)]), [if test "$enableval" = "yes"; then BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date` else BUILD_TIMESTAMP="$enableval" fi], [BUILD_TIMESTAMP=""]) AC_SUBST(BUILD_TIMESTAMP) AC_DEFINE_UNQUOTED(BUILD_TIMESTAMP, "$BUILD_TIMESTAMP", [The time this package was configured for a build]) # And create the files. AC_CONFIG_FILES([ Makefile m4/Makefile compat/Makefile mpi/Makefile cipher/Makefile random/Makefile doc/Makefile src/Makefile src/gcrypt.h src/libgcrypt-config src/versioninfo.rc tests/Makefile ]) AC_CONFIG_FILES([tests/hashtest-256g], [chmod +x tests/hashtest-256g]) AC_CONFIG_FILES([tests/basic-disable-all-hwf], [chmod +x tests/basic-disable-all-hwf]) AC_OUTPUT detection_module="${GCRYPT_HWF_MODULES%.lo}" test -n "$detection_module" || detection_module="none" # Give some feedback GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Libgcrypt],[v${VERSION} has been configured as follows:]) GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Platform: ],[$PRINTABLE_OS_NAME ($host)]) GCRY_MSG_SHOW([Hardware detection module:],[$detection_module]) GCRY_MSG_WRAP([Enabled cipher algorithms:],[$enabled_ciphers]) GCRY_MSG_WRAP([Enabled digest algorithms:],[$enabled_digests]) GCRY_MSG_WRAP([Enabled kdf algorithms: ],[$enabled_kdfs]) GCRY_MSG_WRAP([Enabled pubkey algorithms:],[$enabled_pubkey_ciphers]) GCRY_MSG_SHOW([Random number generator: ],[$random]) GCRY_MSG_SHOW([Try using jitter entropy: ],[$jentsupport]) GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities]) GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport]) GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport]) GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport]) GCRY_MSG_SHOW([Try using Intel SSE4.1: ],[$sse41support]) GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport]) GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport]) GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then cat <