diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h new file mode 100644 index 00000000..814b7ad1 --- /dev/null +++ b/cipher/asm-common-aarch64.h @@ -0,0 +1,32 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsjö + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_AARCH64_H +#define GCRY_ASM_COMMON_AARCH64_H + +#include + +#ifdef __ELF__ +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#endif /* GCRY_ASM_COMMON_AARCH64_H */ diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S index 68d2a7d4..c3cc463d 100644 --- a/cipher/camellia-aarch64.S +++ b/cipher/camellia-aarch64.S @@ -1,557 +1,562 @@ /* camellia-aarch64.S - ARMv8/AArch64 assembly implementation of Camellia * cipher * * Copyright (C) 2016 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS .text /* struct camellia_ctx: */ #define key_table 0 /* register macros */ #define CTX x0 #define RDST x1 #define RSRC x2 #define RKEYBITS w3 #define RTAB1 x4 #define RTAB2 x5 #define RTAB3 x6 #define RTAB4 x7 #define RMASK w8 #define IL w9 #define IR w10 #define xIL x9 #define xIR x10 #define XL w11 #define XR w12 #define YL w13 #define YR w14 #define RT0 w15 #define RT1 w16 #define RT2 w17 -#define RT3 w18 +#define RT3 w19 #define xRT0 x15 #define xRT1 x16 #define xRT2 x17 -#define xRT3 x18 +#define xRT3 x19 #ifdef __AARCH64EL__ #define host_to_be(reg, rtmp) \ rev reg, reg; #define be_to_host(reg, rtmp) \ rev reg, reg; #else /* nop on big-endian */ #define host_to_be(reg, rtmp) /*_*/ #define be_to_host(reg, rtmp) /*_*/ #endif #define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \ ldr a, [rin, #0]; \ ldr b, [rin, #4]; \ be_to_host(a, rtmp); \ ldr c, [rin, #8]; \ be_to_host(b, rtmp); \ ldr d, [rin, #12]; \ be_to_host(c, rtmp); \ be_to_host(d, rtmp); #define str_output_aligned_be(rout, a, b, c, d, rtmp) \ be_to_host(a, rtmp); \ be_to_host(b, rtmp); \ str a, [rout, #0]; \ be_to_host(c, rtmp); \ str b, [rout, #4]; \ be_to_host(d, rtmp); \ str c, [rout, #8]; \ str d, [rout, #12]; /* unaligned word reads/writes allowed */ #define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \ ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp) #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0) /********************************************************************** 1-way camellia **********************************************************************/ #define roundsm(xl, xr, kl, kr, yl, yr) \ ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \ and IR, RMASK, xr, lsl#(4); /*sp1110*/ \ ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \ and IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \ and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \ ldr IR, [RTAB1, xIR]; \ and RT1, RMASK, xl, lsr#(8 - 4); /*sp3033*/ \ eor yl, yl, RT2; \ ldr IL, [RTAB1, xIL]; \ eor yr, yr, RT3; \ \ ldr RT0, [RTAB3, xRT0]; \ ldr RT1, [RTAB3, xRT1]; \ \ and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \ and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \ \ eor IR, IR, RT0; \ eor IL, IL, RT1; \ \ ldr RT2, [RTAB2, xRT2]; \ and RT0, RMASK, xr, lsr#(8 - 4); /*sp4404*/ \ ldr RT3, [RTAB2, xRT3]; \ and RT1, RMASK, xl, lsl#(4); /*sp4404*/ \ \ ldr RT0, [RTAB4, xRT0]; \ ldr RT1, [RTAB4, xRT1]; \ \ eor IR, IR, RT2; \ eor IL, IL, RT3; \ eor IR, IR, RT0; \ eor IL, IL, RT1; \ \ eor IR, IR, IL; \ eor yr, yr, IL, ror#8; \ eor yl, yl, IR; \ eor yr, yr, IR; #define enc_rounds(n) \ roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \ roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \ roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \ roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \ roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \ roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR); #define dec_rounds(n) \ roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \ roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \ roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \ roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \ roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \ roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR); /* perform FL and FL⁻¹ */ #define fls(ll, lr, rl, rr, kll, klr, krl, krr) \ ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \ ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \ and RT0, RT0, ll; \ ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \ orr RT2, RT2, rr; \ ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \ eor rl, rl, RT2; \ eor lr, lr, RT0, ror#31; \ and RT3, RT3, rl; \ orr RT1, RT1, lr; \ eor ll, ll, RT1; \ eor rr, rr, RT3, ror#31; #define enc_fls(n) \ fls(XL, XR, YL, YR, \ (n) * 2 + 0, (n) * 2 + 1, \ (n) * 2 + 2, (n) * 2 + 3); #define dec_fls(n) \ fls(XL, XR, YL, YR, \ (n) * 2 + 2, (n) * 2 + 3, \ (n) * 2 + 0, (n) * 2 + 1); #define inpack(n) \ ldr_input_be(RSRC, XL, XR, YL, YR, RT0); \ ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ eor XL, XL, RT0; \ eor XR, XR, RT1; #define outunpack(n) \ ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ eor YL, YL, RT0; \ eor YR, YR, RT1; \ str_output_be(RDST, YL, YR, XL, XR, RT0, RT1); .globl _gcry_camellia_arm_encrypt_block -.type _gcry_camellia_arm_encrypt_block,@function; +ELF(.type _gcry_camellia_arm_encrypt_block,@function;) _gcry_camellia_arm_encrypt_block: + stp x19, x30, [sp, #-16]! /* input: * x0: keytable * x1: dst * x2: src * x3: keybitlen */ adr RTAB1, _gcry_camellia_arm_tables; mov RMASK, #(0xff<<4); /* byte mask */ add RTAB2, RTAB1, #(1 * 4); add RTAB3, RTAB1, #(2 * 4); add RTAB4, RTAB1, #(3 * 4); inpack(0); enc_rounds(0); enc_fls(8); enc_rounds(8); enc_fls(16); enc_rounds(16); cmp RKEYBITS, #(16 * 8); bne .Lenc_256; outunpack(24); + ldp x19, x30, [sp], #16 ret; .ltorg .Lenc_256: enc_fls(24); enc_rounds(24); outunpack(32); + ldp x19, x30, [sp], #16 ret; .ltorg -.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; +ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;) .globl _gcry_camellia_arm_decrypt_block -.type _gcry_camellia_arm_decrypt_block,@function; +ELF(.type _gcry_camellia_arm_decrypt_block,@function;) _gcry_camellia_arm_decrypt_block: + stp x19, x30, [sp, #-16]! /* input: * x0: keytable * x1: dst * x2: src * x3: keybitlen */ adr RTAB1, _gcry_camellia_arm_tables; mov RMASK, #(0xff<<4); /* byte mask */ add RTAB2, RTAB1, #(1 * 4); add RTAB3, RTAB1, #(2 * 4); add RTAB4, RTAB1, #(3 * 4); cmp RKEYBITS, #(16 * 8); bne .Ldec_256; inpack(24); .Ldec_128: dec_rounds(16); dec_fls(16); dec_rounds(8); dec_fls(8); dec_rounds(0); outunpack(0); + ldp x19, x30, [sp], #16 ret; .ltorg .Ldec_256: inpack(32); dec_rounds(24); dec_fls(24); b .Ldec_128; .ltorg -.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block; +ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;) /* Encryption/Decryption tables */ .globl _gcry_camellia_arm_tables -.type _gcry_camellia_arm_tables,@object; +ELF(.type _gcry_camellia_arm_tables,@object;) .balign 32 _gcry_camellia_arm_tables: .Lcamellia_sp1110: .long 0x70707000 .Lcamellia_sp0222: .long 0x00e0e0e0 .Lcamellia_sp3033: .long 0x38003838 .Lcamellia_sp4404: .long 0x70700070 .long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c .long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3 .long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0 .long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4 .long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057 .long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea .long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae .long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023 .long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b .long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045 .long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5 .long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed .long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f .long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d .long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092 .long 0x23232300, 0x00464646, 0x91009191, 0x86860086 .long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af .long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c .long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f .long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e .long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc .long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e .long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b .long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6 .long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039 .long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5 .long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d .long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9 .long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a .long 0x92929200, 0x00252525, 0x49004949, 0x51510051 .long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c .long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b .long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a .long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb .long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0 .long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074 .long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b .long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0 .long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084 .long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df .long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb .long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034 .long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076 .long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d .long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9 .long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1 .long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004 .long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014 .long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a .long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de .long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011 .long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032 .long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c .long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053 .long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2 .long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe .long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf .long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3 .long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a .long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024 .long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8 .long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060 .long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069 .long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa .long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0 .long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1 .long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062 .long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054 .long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e .long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0 .long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064 .long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010 .long 0x12121200, 0x00242424, 0x09000909, 0x00000000 .long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3 .long 0x20202000, 0x00404040, 0x10001010, 0x75750075 .long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a .long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6 .long 0x84848400, 0x00090909, 0x42004242, 0x09090009 .long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd .long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087 .long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083 .long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd .long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090 .long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073 .long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6 .long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d .long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf .long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052 .long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8 .long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8 .long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6 .long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081 .long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f .long 0x04040400, 0x00080808, 0x02000202, 0x13130013 .long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063 .long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9 .long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7 .long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f .long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc .long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029 .long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9 .long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f .long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4 .long 0x32323200, 0x00646464, 0x19001919, 0x78780078 .long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006 .long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7 .long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071 .long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4 .long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab .long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088 .long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d .long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072 .long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9 .long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8 .long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac .long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036 .long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a .long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c .long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1 .long 0x24242400, 0x00484848, 0x12001212, 0x40400040 .long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3 .long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb .long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043 .long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015 .long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad .long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077 .long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080 .long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082 .long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec .long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027 .long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5 .long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085 .long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035 .long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c .long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041 .long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef .long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093 .long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019 .long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021 .long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e .long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e .long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065 .long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd .long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8 .long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f .long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb .long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce .long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030 .long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f .long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5 .long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a .long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1 .long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca .long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047 .long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d .long 0x09090900, 0x00121212, 0x84008484, 0x01010001 .long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6 .long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056 .long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d .long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d .long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066 .long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc .long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d .long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012 .long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020 .long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1 .long 0x33333300, 0x00666666, 0x99009999, 0x99990099 .long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c .long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2 .long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e .long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005 .long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7 .long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031 .long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017 .long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7 .long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058 .long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061 .long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b .long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c .long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f .long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016 .long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018 .long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022 .long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044 .long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2 .long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5 .long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091 .long 0x13131300, 0x00262626, 0x89008989, 0x08080008 .long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8 .long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc .long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050 .long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0 .long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d .long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089 .long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097 .long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b .long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095 .long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff .long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2 .long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4 .long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048 .long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7 .long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db .long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003 .long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da .long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f .long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094 .long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c .long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002 .long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a .long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033 .long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067 .long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3 .long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f .long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2 .long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b .long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026 .long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037 .long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b .long 0x88888800, 0x00111111, 0x44004444, 0x96960096 .long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b .long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be .long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e .long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079 .long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c .long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e .long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e .long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5 .long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6 .long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd .long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059 .long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098 .long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a .long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046 .long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba .long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025 .long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042 .long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2 .long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa .long 0x40404000, 0x00808080, 0x20002020, 0x07070007 .long 0x28282800, 0x00505050, 0x14001414, 0x55550055 .long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee .long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a .long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049 .long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068 .long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038 .long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4 .long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028 .long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b .long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9 .long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1 .long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3 .long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4 .long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7 .long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e -.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables; +ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__*/ diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S index 5990a084..3844d4e1 100644 --- a/cipher/chacha20-aarch64.S +++ b/cipher/chacha20-aarch64.S @@ -1,308 +1,314 @@ /* chacha20-aarch64.S - ARMv8/AArch64 accelerated chacha20 blocks function * * Copyright (C) 2017,2018 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ /* * Based on D. J. Bernstein reference implementation at * http://cr.yp.to/chacha.html: * * chacha-regs.c version 20080118 * D. J. Bernstein * Public domain. */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) && \ defined(USE_CHACHA20) .cpu generic+simd .text +#ifdef _WIN32 +#define GET_DATA_POINTER(reg, name) \ + adrp reg, name ; \ + add reg, reg, #:lo12:name ; +#else #define GET_DATA_POINTER(reg, name) \ adrp reg, :got:name ; \ ldr reg, [reg, #:got_lo12:name] ; +#endif /* register macros */ #define INPUT x0 #define DST x1 #define SRC x2 #define NBLKS x3 #define ROUND x4 #define INPUT_CTR x5 #define INPUT_POS x6 #define CTR x7 /* vector registers */ #define X0 v16 #define X1 v17 #define X2 v18 #define X3 v19 #define X4 v20 #define X5 v21 #define X6 v22 #define X7 v23 #define X8 v24 #define X9 v25 #define X10 v26 #define X11 v27 #define X12 v28 #define X13 v29 #define X14 v30 #define X15 v31 #define VCTR v0 #define VTMP0 v1 #define VTMP1 v2 #define VTMP2 v3 #define VTMP3 v4 #define X12_TMP v5 #define X13_TMP v6 /********************************************************************** helper macros **********************************************************************/ #define vpunpckldq(s1, s2, dst) \ zip1 dst.4s, s2.4s, s1.4s; #define vpunpckhdq(s1, s2, dst) \ zip2 dst.4s, s2.4s, s1.4s; #define vpunpcklqdq(s1, s2, dst) \ zip1 dst.2d, s2.2d, s1.2d; #define vpunpckhqdq(s1, s2, dst) \ zip2 dst.2d, s2.2d, s1.2d; /* 4x4 32-bit integer matrix transpose */ #define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ vpunpckhdq(x1, x0, t2); \ vpunpckldq(x1, x0, x0); \ \ vpunpckldq(x3, x2, t1); \ vpunpckhdq(x3, x2, x2); \ \ vpunpckhqdq(t1, x0, x1); \ vpunpcklqdq(t1, x0, x0); \ \ vpunpckhqdq(x2, t2, x3); \ vpunpcklqdq(x2, t2, x2); #define clear(x) \ eor x.16b, x.16b, x.16b; /********************************************************************** 4-way chacha20 **********************************************************************/ #define ROTATE2(dst1,dst2,c,src1,src2) \ shl dst1.4s, src1.4s, #(c); \ shl dst2.4s, src2.4s, #(c); \ sri dst1.4s, src1.4s, #(32 - (c)); \ sri dst2.4s, src2.4s, #(32 - (c)); #define ROTATE2_16(dst1,dst2,src1,src2) \ rev32 dst1.8h, src1.8h; \ rev32 dst2.8h, src2.8h; #define XOR(d,s1,s2) \ eor d.16b, s2.16b, s1.16b; #define PLUS(ds,s) \ add ds.4s, ds.4s, s.4s; #define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,tmp2) \ PLUS(a1,b1); PLUS(a2,b2); XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); \ ROTATE2_16(d1, d2, tmp1, tmp2); \ PLUS(c1,d1); PLUS(c2,d2); XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); \ ROTATE2(b1, b2, 12, tmp1, tmp2); \ PLUS(a1,b1); PLUS(a2,b2); XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); \ ROTATE2(d1, d2, 8, tmp1, tmp2); \ PLUS(c1,d1); PLUS(c2,d2); XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); \ ROTATE2(b1, b2, 7, tmp1, tmp2); chacha20_data: .align 4 .Linc_counter: .long 0,1,2,3 .align 3 .globl _gcry_chacha20_aarch64_blocks4 -.type _gcry_chacha20_aarch64_blocks4,%function; +ELF(.type _gcry_chacha20_aarch64_blocks4,%function;) _gcry_chacha20_aarch64_blocks4: /* input: * x0: input * x1: dst * x2: src * x3: nblks (multiple of 4) */ GET_DATA_POINTER(CTR, .Linc_counter); add INPUT_CTR, INPUT, #(12*4); mov INPUT_POS, INPUT; ld1 {VCTR.16b}, [CTR]; .Loop4: /* Construct counter vectors X12 and X13 */ ld1 {X15.16b}, [INPUT_CTR]; mov ROUND, #20; ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS]; dup X12.4s, X15.s[0]; dup X13.4s, X15.s[1]; ldr CTR, [INPUT_CTR]; add X12.4s, X12.4s, VCTR.4s; dup X0.4s, VTMP1.s[0]; dup X1.4s, VTMP1.s[1]; dup X2.4s, VTMP1.s[2]; dup X3.4s, VTMP1.s[3]; dup X14.4s, X15.s[2]; cmhi VTMP0.4s, VCTR.4s, X12.4s; dup X15.4s, X15.s[3]; add CTR, CTR, #4; /* Update counter */ dup X4.4s, VTMP2.s[0]; dup X5.4s, VTMP2.s[1]; dup X6.4s, VTMP2.s[2]; dup X7.4s, VTMP2.s[3]; sub X13.4s, X13.4s, VTMP0.4s; dup X8.4s, VTMP3.s[0]; dup X9.4s, VTMP3.s[1]; dup X10.4s, VTMP3.s[2]; dup X11.4s, VTMP3.s[3]; mov X12_TMP.16b, X12.16b; mov X13_TMP.16b, X13.16b; str CTR, [INPUT_CTR]; .Lround2: subs ROUND, ROUND, #2 QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,VTMP0,VTMP1) QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,VTMP0,VTMP1) QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,VTMP0,VTMP1) QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,VTMP0,VTMP1) b.ne .Lround2; ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS], #32; PLUS(X12, X12_TMP); /* INPUT + 12 * 4 + counter */ PLUS(X13, X13_TMP); /* INPUT + 13 * 4 + counter */ dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */ dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */ dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */ dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */ PLUS(X0, VTMP2); PLUS(X1, VTMP3); PLUS(X2, X12_TMP); PLUS(X3, X13_TMP); dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */ dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */ dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */ dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */ ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS]; mov INPUT_POS, INPUT; PLUS(X4, VTMP2); PLUS(X5, VTMP3); PLUS(X6, X12_TMP); PLUS(X7, X13_TMP); dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */ dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */ dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */ dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */ dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */ dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */ PLUS(X8, VTMP2); PLUS(X9, VTMP3); PLUS(X10, X12_TMP); PLUS(X11, X13_TMP); PLUS(X14, VTMP0); PLUS(X15, VTMP1); transpose_4x4(X0, X1, X2, X3, VTMP0, VTMP1, VTMP2); transpose_4x4(X4, X5, X6, X7, VTMP0, VTMP1, VTMP2); transpose_4x4(X8, X9, X10, X11, VTMP0, VTMP1, VTMP2); transpose_4x4(X12, X13, X14, X15, VTMP0, VTMP1, VTMP2); subs NBLKS, NBLKS, #4; ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32; eor VTMP0.16b, X0.16b, VTMP0.16b; eor VTMP1.16b, X4.16b, VTMP1.16b; eor VTMP2.16b, X8.16b, VTMP2.16b; eor VTMP3.16b, X12.16b, VTMP3.16b; eor X12_TMP.16b, X1.16b, X12_TMP.16b; eor X13_TMP.16b, X5.16b, X13_TMP.16b; st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32; ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32; eor VTMP0.16b, X9.16b, VTMP0.16b; eor VTMP1.16b, X13.16b, VTMP1.16b; eor VTMP2.16b, X2.16b, VTMP2.16b; eor VTMP3.16b, X6.16b, VTMP3.16b; eor X12_TMP.16b, X10.16b, X12_TMP.16b; eor X13_TMP.16b, X14.16b, X13_TMP.16b; st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32; eor VTMP0.16b, X3.16b, VTMP0.16b; eor VTMP1.16b, X7.16b, VTMP1.16b; eor VTMP2.16b, X11.16b, VTMP2.16b; eor VTMP3.16b, X15.16b, VTMP3.16b; st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; b.ne .Loop4; /* clear the used vector registers and stack */ clear(VTMP0); clear(VTMP1); clear(VTMP2); clear(VTMP3); clear(X12_TMP); clear(X13_TMP); clear(X0); clear(X1); clear(X2); clear(X3); clear(X4); clear(X5); clear(X6); clear(X7); clear(X8); clear(X9); clear(X10); clear(X11); clear(X12); clear(X13); clear(X14); clear(X15); eor x0, x0, x0 ret -.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4; +ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;) #endif diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S index 0cfaf1cc..b6c4f59d 100644 --- a/cipher/cipher-gcm-armv8-aarch64-ce.S +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -1,413 +1,413 @@ /* cipher-gcm-armv8-aarch64-ce.S - ARM/CE accelerated GHASH * Copyright (C) 2016 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) .cpu generic+simd+crypto .text #define GET_DATA_POINTER(reg, name) \ adrp reg, :got:name ; \ ldr reg, [reg, #:got_lo12:name] ; /* Constants */ .align 4 gcry_gcm_reduction_constant: .Lrconst: .quad 0x87 /* Register macros */ #define rhash v0 #define rr0 v1 #define rr1 v2 #define rbuf v3 #define rbuf1 v4 #define rbuf2 v5 #define rbuf3 v6 #define rbuf4 v7 #define rbuf5 v8 #define rr2 v9 #define rr3 v10 #define rr4 v11 #define rr5 v12 #define rr6 v13 #define rr7 v14 #define rr8 v15 #define rr9 v16 #define rrconst v18 #define rh1 v19 #define rh2 v20 #define rh3 v21 #define rh4 v22 #define rh5 v23 #define rh6 v24 #define t0 v25 #define t1 v26 #define t2 v27 #define t3 v28 #define t4 v29 #define t5 v30 #define vZZ v31 /* GHASH macros */ /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in * Cryptology — CT-RSA 2015" for details. */ /* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) */ #define PMUL_128x128(r0, r1, a, b, T0, T1, interleave_op) \ ext T0.16b, b.16b, b.16b, #8; \ pmull r0.1q, a.1d, b.1d; \ pmull2 r1.1q, a.2d, b.2d; \ pmull T1.1q, a.1d, T0.1d; \ pmull2 T0.1q, a.2d, T0.2d; \ interleave_op; \ eor T0.16b, T0.16b, T1.16b; \ ext T1.16b, vZZ.16b, T0.16b, #8; \ ext T0.16b, T0.16b, vZZ.16b, #8; \ eor r0.16b, r0.16b, T1.16b; \ eor r1.16b, r1.16b, T0.16b; /* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A) * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B) * Input: 'aC' and 'bC', Output: 'r0C:r1C' (low 128-bits in r0C, high in r1C) */ #define PMUL_128x128_3(r0A, r1A, aA, bA, t0A, t1A, \ r0B, r1B, aB, bB, t0B, t1B, \ r0C, r1C, aC, bC, t0C, t1C, interleave_op) \ ext t0A.16b, bA.16b, bA.16b, #8; \ pmull r0A.1q, aA.1d, bA.1d; \ pmull2 r1A.1q, aA.2d, bA.2d; \ ext t0B.16b, bB.16b, bB.16b, #8; \ pmull r0B.1q, aB.1d, bB.1d; \ pmull2 r1B.1q, aB.2d, bB.2d; \ ext t0C.16b, bC.16b, bC.16b, #8; \ pmull r0C.1q, aC.1d, bC.1d; \ pmull2 r1C.1q, aC.2d, bC.2d; \ pmull t1A.1q, aA.1d, t0A.1d; \ pmull2 t0A.1q, aA.2d, t0A.2d; \ pmull t1B.1q, aB.1d, t0B.1d; \ pmull2 t0B.1q, aB.2d, t0B.2d; \ pmull t1C.1q, aC.1d, t0C.1d; \ pmull2 t0C.1q, aC.2d, t0C.2d; \ eor t0A.16b, t0A.16b, t1A.16b; \ eor t0B.16b, t0B.16b, t1B.16b; \ eor t0C.16b, t0C.16b, t1C.16b; \ interleave_op; \ ext t1A.16b, vZZ.16b, t0A.16b, #8; \ ext t0A.16b, t0A.16b, vZZ.16b, #8; \ ext t1B.16b, vZZ.16b, t0B.16b, #8; \ ext t0B.16b, t0B.16b, vZZ.16b, #8; \ ext t1C.16b, vZZ.16b, t0C.16b, #8; \ ext t0C.16b, t0C.16b, vZZ.16b, #8; \ eor r0A.16b, r0A.16b, t1A.16b; \ eor r1A.16b, r1A.16b, t0A.16b; \ eor r0B.16b, r0B.16b, t1B.16b; \ eor r1B.16b, r1B.16b, t0B.16b; \ eor r0C.16b, r0C.16b, t1C.16b; \ eor r1C.16b, r1C.16b, t0C.16b; \ /* Input: 'r0:r1', Output: 'a' */ #define REDUCTION(a, r0, r1, rconst, T0, T1, interleave_op1, interleave_op2, \ interleave_op3) \ pmull2 T0.1q, r1.2d, rconst.2d; \ interleave_op1; \ ext T1.16b, T0.16b, vZZ.16b, #8; \ ext T0.16b, vZZ.16b, T0.16b, #8; \ interleave_op2; \ eor r1.16b, r1.16b, T1.16b; \ eor r0.16b, r0.16b, T0.16b; \ pmull T0.1q, r1.1d, rconst.1d; \ interleave_op3; \ eor a.16b, r0.16b, T0.16b; /* Other functional macros */ #define _(...) __VA_ARGS__ #define __ _() #define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; #define VPUSH_ABI \ stp d8, d9, [sp, #-16]!; \ stp d10, d11, [sp, #-16]!; \ stp d12, d13, [sp, #-16]!; \ stp d14, d15, [sp, #-16]!; #define VPOP_ABI \ ldp d14, d15, [sp], #16; \ ldp d12, d13, [sp], #16; \ ldp d10, d11, [sp], #16; \ ldp d8, d9, [sp], #16; /* * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result, * const byte *buf, size_t nblocks, * void *gcm_table); */ .align 3 .globl _gcry_ghash_armv8_ce_pmull -.type _gcry_ghash_armv8_ce_pmull,%function; +ELF(.type _gcry_ghash_armv8_ce_pmull,%function;) _gcry_ghash_armv8_ce_pmull: /* input: * x0: gcm_key * x1: result/hash * x2: buf * x3: nblocks * x4: gcm_table */ cbz x3, .Ldo_nothing; GET_DATA_POINTER(x5, .Lrconst) eor vZZ.16b, vZZ.16b, vZZ.16b ld1 {rhash.16b}, [x1] ld1 {rh1.16b}, [x0] rbit rhash.16b, rhash.16b /* bit-swap */ ld1r {rrconst.2d}, [x5] cmp x3, #6 b.lo .Less_than_6 add x6, x4, #64 VPUSH_ABI ld1 {rh2.16b-rh5.16b}, [x4] ld1 {rh6.16b}, [x6] sub x3, x3, #6 ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16) ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16) rbit rbuf.16b, rbuf.16b /* bit-swap */ rbit rbuf1.16b, rbuf1.16b /* bit-swap */ rbit rbuf2.16b, rbuf2.16b /* bit-swap */ rbit rbuf3.16b, rbuf3.16b /* bit-swap */ rbit rbuf4.16b, rbuf4.16b /* bit-swap */ rbit rbuf5.16b, rbuf5.16b /* bit-swap */ eor rhash.16b, rhash.16b, rbuf.16b cmp x3, #6 b.lo .Lend_6 .Loop_6: /* (in1) * H⁵ => rr0:rr1 */ /* (in2) * H⁴ => rr2:rr3 */ /* (in0 ^ hash) * H⁶ => rr4:rr5 */ PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1, rr2, rr3, rbuf2, rh4, t2, t3, rr4, rr5, rhash, rh6, t4, t5, _(sub x3, x3, #6)) ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16) cmp x3, #6 eor rr0.16b, rr0.16b, rr2.16b eor rr1.16b, rr1.16b, rr3.16b /* (in3) * H³ => rr2:rr3 */ /* (in4) * H² => rr6:rr7 */ /* (in5) * H¹ => rr8:rr9 */ PMUL_128x128_3(rr2, rr3, rbuf3, rh3, t0, t1, rr6, rr7, rbuf4, rh2, t2, t3, rr8, rr9, rbuf5, rh1, t4, t5, _(eor rr0.16b, rr0.16b, rr4.16b; eor rr1.16b, rr1.16b, rr5.16b)) eor rr0.16b, rr0.16b, rr2.16b eor rr1.16b, rr1.16b, rr3.16b rbit rbuf.16b, rbuf.16b eor rr0.16b, rr0.16b, rr6.16b eor rr1.16b, rr1.16b, rr7.16b rbit rbuf1.16b, rbuf1.16b eor rr0.16b, rr0.16b, rr8.16b eor rr1.16b, rr1.16b, rr9.16b ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16) REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, _(rbit rbuf2.16b, rbuf2.16b), _(rbit rbuf3.16b, rbuf3.16b), _(rbit rbuf4.16b, rbuf4.16b)) rbit rbuf5.16b, rbuf5.16b eor rhash.16b, rhash.16b, rbuf.16b b.hs .Loop_6 .Lend_6: /* (in1) * H⁵ => rr0:rr1 */ /* (in0 ^ hash) * H⁶ => rr2:rr3 */ /* (in2) * H⁴ => rr4:rr5 */ PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1, rr2, rr3, rhash, rh6, t2, t3, rr4, rr5, rbuf2, rh4, t4, t5, __) eor rr0.16b, rr0.16b, rr2.16b eor rr1.16b, rr1.16b, rr3.16b eor rr0.16b, rr0.16b, rr4.16b eor rr1.16b, rr1.16b, rr5.16b /* (in3) * H³ => rhash:rbuf */ /* (in4) * H² => rr6:rr7 */ /* (in5) * H¹ => rr8:rr9 */ PMUL_128x128_3(rhash, rbuf, rbuf3, rh3, t0, t1, rr6, rr7, rbuf4, rh2, t2, t3, rr8, rr9, rbuf5, rh1, t4, t5, _(CLEAR_REG(rh4); CLEAR_REG(rh5); CLEAR_REG(rh6))) eor rr0.16b, rr0.16b, rhash.16b eor rr1.16b, rr1.16b, rbuf.16b eor rr0.16b, rr0.16b, rr6.16b eor rr1.16b, rr1.16b, rr7.16b eor rr0.16b, rr0.16b, rr8.16b eor rr1.16b, rr1.16b, rr9.16b REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, _(CLEAR_REG(rh2); CLEAR_REG(rh3); CLEAR_REG(rr2); CLEAR_REG(rbuf2); CLEAR_REG(rbuf3)), _(CLEAR_REG(rr3); CLEAR_REG(rr4); CLEAR_REG(rr5); CLEAR_REG(rr6); CLEAR_REG(rr7)), _(CLEAR_REG(rr8); CLEAR_REG(rr9); CLEAR_REG(rbuf1); CLEAR_REG(rbuf2))) CLEAR_REG(rbuf4) CLEAR_REG(rbuf5) CLEAR_REG(t2) CLEAR_REG(t3) CLEAR_REG(t4) CLEAR_REG(t5) VPOP_ABI cbz x3, .Ldone .Less_than_6: /* Handle remaining blocks. */ ld1 {rbuf.16b}, [x2], #16 sub x3, x3, #1 rbit rbuf.16b, rbuf.16b /* bit-swap */ eor rhash.16b, rhash.16b, rbuf.16b cbz x3, .Lend .Loop: PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(ld1 {rbuf.16b}, [x2], #16)) REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, _(sub x3, x3, #1), _(rbit rbuf.16b, rbuf.16b), __) eor rhash.16b, rhash.16b, rbuf.16b cbnz x3, .Loop .Lend: PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(CLEAR_REG(rbuf))) REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, __, _(CLEAR_REG(rh1)), __) .Ldone: CLEAR_REG(rr1) CLEAR_REG(rr0) rbit rhash.16b, rhash.16b /* bit-swap */ CLEAR_REG(t0) CLEAR_REG(t1) st1 {rhash.2d}, [x1] CLEAR_REG(rhash) .Ldo_nothing: mov x0, #0 ret -.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull; +ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;) /* * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table); */ .align 3 .globl _gcry_ghash_setup_armv8_ce_pmull -.type _gcry_ghash_setup_armv8_ce_pmull,%function; +ELF(.type _gcry_ghash_setup_armv8_ce_pmull,%function;) _gcry_ghash_setup_armv8_ce_pmull: /* input: * x0: gcm_key * x1: gcm_table */ GET_DATA_POINTER(x2, .Lrconst) /* H¹ */ ld1 {rh1.16b}, [x0] rbit rh1.16b, rh1.16b st1 {rh1.16b}, [x0] ld1r {rrconst.2d}, [x2] /* H² */ PMUL_128x128(rr0, rr1, rh1, rh1, t0, t1, __) REDUCTION(rh2, rr0, rr1, rrconst, t0, t1, __, __, __) /* H³ */ PMUL_128x128(rr0, rr1, rh2, rh1, t0, t1, __) REDUCTION(rh3, rr0, rr1, rrconst, t0, t1, __, __, __) /* H⁴ */ PMUL_128x128(rr0, rr1, rh2, rh2, t0, t1, __) REDUCTION(rh4, rr0, rr1, rrconst, t0, t1, __, __, __) /* H⁵ */ PMUL_128x128(rr0, rr1, rh2, rh3, t0, t1, __) REDUCTION(rh5, rr0, rr1, rrconst, t0, t1, __, __, __) /* H⁶ */ PMUL_128x128(rr0, rr1, rh3, rh3, t0, t1, __) REDUCTION(rh6, rr0, rr1, rrconst, t0, t1, __, __, __) st1 {rh2.16b-rh4.16b}, [x1], #(3*16) st1 {rh5.16b-rh6.16b}, [x1] ret -.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull; +ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;) #endif diff --git a/cipher/rijndael-aarch64.S b/cipher/rijndael-aarch64.S index e533bbe8..aad74875 100644 --- a/cipher/rijndael-aarch64.S +++ b/cipher/rijndael-aarch64.S @@ -1,510 +1,510 @@ /* rijndael-aarch64.S - ARMv8/AArch64 assembly implementation of AES cipher * * Copyright (C) 2016 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS .text /* register macros */ #define CTX x0 #define RDST x1 #define RSRC x2 #define NROUNDS w3 #define RTAB x4 #define RMASK w5 #define RA w8 #define RB w9 #define RC w10 #define RD w11 #define RNA w12 #define RNB w13 #define RNC w14 #define RND w15 #define RT0 w6 #define RT1 w7 #define RT2 w16 #define xRT0 x6 #define xRT1 x7 #define xRT2 x16 #define xw8 x8 #define xw9 x9 #define xw10 x10 #define xw11 x11 #define xw12 x12 #define xw13 x13 #define xw14 x14 #define xw15 x15 /*********************************************************************** * ARMv8/AArch64 assembly implementation of the AES cipher ***********************************************************************/ #define preload_first_key(round, ra) \ ldr ra, [CTX, #(((round) * 16) + 0 * 4)]; #define dummy(round, ra) /* nothing */ #define addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ ldp rna, rnb, [CTX]; \ ldp rnc, rnd, [CTX, #8]; \ eor ra, ra, rna; \ eor rb, rb, rnb; \ eor rc, rc, rnc; \ preload_key(1, rna); \ eor rd, rd, rnd; #define do_encround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \ \ and RT0, RMASK, ra, lsl#2; \ ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \ and RT1, RMASK, ra, lsr#(8 - 2); \ ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \ and RT2, RMASK, ra, lsr#(16 - 2); \ ldr RT0, [RTAB, xRT0]; \ and ra, RMASK, ra, lsr#(24 - 2); \ \ ldr RT1, [RTAB, xRT1]; \ eor rna, rna, RT0; \ ldr RT2, [RTAB, xRT2]; \ and RT0, RMASK, rd, lsl#2; \ ldr ra, [RTAB, x##ra]; \ \ eor rnd, rnd, RT1, ror #24; \ and RT1, RMASK, rd, lsr#(8 - 2); \ eor rnc, rnc, RT2, ror #16; \ and RT2, RMASK, rd, lsr#(16 - 2); \ eor rnb, rnb, ra, ror #8; \ ldr RT0, [RTAB, xRT0]; \ and rd, RMASK, rd, lsr#(24 - 2); \ \ ldr RT1, [RTAB, xRT1]; \ eor rnd, rnd, RT0; \ ldr RT2, [RTAB, xRT2]; \ and RT0, RMASK, rc, lsl#2; \ ldr rd, [RTAB, x##rd]; \ \ eor rnc, rnc, RT1, ror #24; \ and RT1, RMASK, rc, lsr#(8 - 2); \ eor rnb, rnb, RT2, ror #16; \ and RT2, RMASK, rc, lsr#(16 - 2); \ eor rna, rna, rd, ror #8; \ ldr RT0, [RTAB, xRT0]; \ and rc, RMASK, rc, lsr#(24 - 2); \ \ ldr RT1, [RTAB, xRT1]; \ eor rnc, rnc, RT0; \ ldr RT2, [RTAB, xRT2]; \ and RT0, RMASK, rb, lsl#2; \ ldr rc, [RTAB, x##rc]; \ \ eor rnb, rnb, RT1, ror #24; \ and RT1, RMASK, rb, lsr#(8 - 2); \ eor rna, rna, RT2, ror #16; \ and RT2, RMASK, rb, lsr#(16 - 2); \ eor rnd, rnd, rc, ror #8; \ ldr RT0, [RTAB, xRT0]; \ and rb, RMASK, rb, lsr#(24 - 2); \ \ ldr RT1, [RTAB, xRT1]; \ eor rnb, rnb, RT0; \ ldr RT2, [RTAB, xRT2]; \ eor rna, rna, RT1, ror #24; \ ldr rb, [RTAB, x##rb]; \ \ eor rnd, rnd, RT2, ror #16; \ preload_key((next_r) + 1, ra); \ eor rnc, rnc, rb, ror #8; #define do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \ and RT0, RMASK, ra, lsl#2; \ and RT1, RMASK, ra, lsr#(8 - 2); \ and RT2, RMASK, ra, lsr#(16 - 2); \ ldrb rna, [RTAB, xRT0]; \ and ra, RMASK, ra, lsr#(24 - 2); \ ldrb rnd, [RTAB, xRT1]; \ and RT0, RMASK, rd, lsl#2; \ ldrb rnc, [RTAB, xRT2]; \ ror rnd, rnd, #24; \ ldrb rnb, [RTAB, x##ra]; \ and RT1, RMASK, rd, lsr#(8 - 2); \ ror rnc, rnc, #16; \ and RT2, RMASK, rd, lsr#(16 - 2); \ ror rnb, rnb, #8; \ ldrb RT0, [RTAB, xRT0]; \ and rd, RMASK, rd, lsr#(24 - 2); \ ldrb RT1, [RTAB, xRT1]; \ \ orr rnd, rnd, RT0; \ ldrb RT2, [RTAB, xRT2]; \ and RT0, RMASK, rc, lsl#2; \ ldrb rd, [RTAB, x##rd]; \ orr rnc, rnc, RT1, ror #24; \ and RT1, RMASK, rc, lsr#(8 - 2); \ orr rnb, rnb, RT2, ror #16; \ and RT2, RMASK, rc, lsr#(16 - 2); \ orr rna, rna, rd, ror #8; \ ldrb RT0, [RTAB, xRT0]; \ and rc, RMASK, rc, lsr#(24 - 2); \ ldrb RT1, [RTAB, xRT1]; \ \ orr rnc, rnc, RT0; \ ldrb RT2, [RTAB, xRT2]; \ and RT0, RMASK, rb, lsl#2; \ ldrb rc, [RTAB, x##rc]; \ orr rnb, rnb, RT1, ror #24; \ and RT1, RMASK, rb, lsr#(8 - 2); \ orr rna, rna, RT2, ror #16; \ ldrb RT0, [RTAB, xRT0]; \ and RT2, RMASK, rb, lsr#(16 - 2); \ ldrb RT1, [RTAB, xRT1]; \ orr rnd, rnd, rc, ror #8; \ ldrb RT2, [RTAB, xRT2]; \ and rb, RMASK, rb, lsr#(24 - 2); \ ldrb rb, [RTAB, x##rb]; \ \ orr rnb, rnb, RT0; \ orr rna, rna, RT1, ror #24; \ orr rnd, rnd, RT2, ror #16; \ orr rnc, rnc, rb, ror #8; #define firstencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); \ do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); #define encround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key); #define lastencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ add CTX, CTX, #(((round) + 1) * 16); \ add RTAB, RTAB, #1; \ do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .globl _gcry_aes_arm_encrypt_block -.type _gcry_aes_arm_encrypt_block,%function; +ELF(.type _gcry_aes_arm_encrypt_block,%function;) _gcry_aes_arm_encrypt_block: /* input: * %x0: keysched, CTX * %x1: dst * %x2: src * %w3: number of rounds.. 10, 12 or 14 * %x4: encryption table */ /* read input block */ /* aligned load */ ldp RA, RB, [RSRC]; ldp RC, RD, [RSRC, #8]; #ifndef __AARCH64EL__ rev RA, RA; rev RB, RB; rev RC, RC; rev RD, RD; #endif mov RMASK, #(0xff<<2); firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND); encround(1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); encround(2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); encround(3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); encround(4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); encround(5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); cmp NROUNDS, #12; bge .Lenc_not_128; encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD); .Lenc_done: /* store output block */ /* aligned store */ #ifndef __AARCH64EL__ rev RA, RA; rev RB, RB; rev RC, RC; rev RD, RD; #endif /* write output block */ stp RA, RB, [RDST]; stp RC, RD, [RDST, #8]; mov x0, #(0); ret; .ltorg .Lenc_not_128: beq .Lenc_192 encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); encround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); encround(12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); lastencround(13, RNA, RNB, RNC, RND, RA, RB, RC, RD); b .Lenc_done; .ltorg .Lenc_192: encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD); b .Lenc_done; -.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; +ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;) #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \ ldr rnb, [CTX, #(((round) * 16) + 1 * 4)]; \ eor ra, ra, rna; \ ldr rnc, [CTX, #(((round) * 16) + 2 * 4)]; \ eor rb, rb, rnb; \ ldr rnd, [CTX, #(((round) * 16) + 3 * 4)]; \ eor rc, rc, rnc; \ preload_first_key((round) - 1, rna); \ eor rd, rd, rnd; #define do_decround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \ \ and RT0, RMASK, ra, lsl#2; \ ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \ and RT1, RMASK, ra, lsr#(8 - 2); \ ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \ and RT2, RMASK, ra, lsr#(16 - 2); \ ldr RT0, [RTAB, xRT0]; \ and ra, RMASK, ra, lsr#(24 - 2); \ \ ldr RT1, [RTAB, xRT1]; \ eor rna, rna, RT0; \ ldr RT2, [RTAB, xRT2]; \ and RT0, RMASK, rb, lsl#2; \ ldr ra, [RTAB, x##ra]; \ \ eor rnb, rnb, RT1, ror #24; \ and RT1, RMASK, rb, lsr#(8 - 2); \ eor rnc, rnc, RT2, ror #16; \ and RT2, RMASK, rb, lsr#(16 - 2); \ eor rnd, rnd, ra, ror #8; \ ldr RT0, [RTAB, xRT0]; \ and rb, RMASK, rb, lsr#(24 - 2); \ \ ldr RT1, [RTAB, xRT1]; \ eor rnb, rnb, RT0; \ ldr RT2, [RTAB, xRT2]; \ and RT0, RMASK, rc, lsl#2; \ ldr rb, [RTAB, x##rb]; \ \ eor rnc, rnc, RT1, ror #24; \ and RT1, RMASK, rc, lsr#(8 - 2); \ eor rnd, rnd, RT2, ror #16; \ and RT2, RMASK, rc, lsr#(16 - 2); \ eor rna, rna, rb, ror #8; \ ldr RT0, [RTAB, xRT0]; \ and rc, RMASK, rc, lsr#(24 - 2); \ \ ldr RT1, [RTAB, xRT1]; \ eor rnc, rnc, RT0; \ ldr RT2, [RTAB, xRT2]; \ and RT0, RMASK, rd, lsl#2; \ ldr rc, [RTAB, x##rc]; \ \ eor rnd, rnd, RT1, ror #24; \ and RT1, RMASK, rd, lsr#(8 - 2); \ eor rna, rna, RT2, ror #16; \ and RT2, RMASK, rd, lsr#(16 - 2); \ eor rnb, rnb, rc, ror #8; \ ldr RT0, [RTAB, xRT0]; \ and rd, RMASK, rd, lsr#(24 - 2); \ \ ldr RT1, [RTAB, xRT1]; \ eor rnd, rnd, RT0; \ ldr RT2, [RTAB, xRT2]; \ eor rna, rna, RT1, ror #24; \ ldr rd, [RTAB, x##rd]; \ \ eor rnb, rnb, RT2, ror #16; \ preload_key((next_r) - 1, ra); \ eor rnc, rnc, rd, ror #8; #define do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \ and RT0, RMASK, ra; \ and RT1, RMASK, ra, lsr#8; \ and RT2, RMASK, ra, lsr#16; \ ldrb rna, [RTAB, xRT0]; \ lsr ra, ra, #24; \ ldrb rnb, [RTAB, xRT1]; \ and RT0, RMASK, rb; \ ldrb rnc, [RTAB, xRT2]; \ ror rnb, rnb, #24; \ ldrb rnd, [RTAB, x##ra]; \ and RT1, RMASK, rb, lsr#8; \ ror rnc, rnc, #16; \ and RT2, RMASK, rb, lsr#16; \ ror rnd, rnd, #8; \ ldrb RT0, [RTAB, xRT0]; \ lsr rb, rb, #24; \ ldrb RT1, [RTAB, xRT1]; \ \ orr rnb, rnb, RT0; \ ldrb RT2, [RTAB, xRT2]; \ and RT0, RMASK, rc; \ ldrb rb, [RTAB, x##rb]; \ orr rnc, rnc, RT1, ror #24; \ and RT1, RMASK, rc, lsr#8; \ orr rnd, rnd, RT2, ror #16; \ and RT2, RMASK, rc, lsr#16; \ orr rna, rna, rb, ror #8; \ ldrb RT0, [RTAB, xRT0]; \ lsr rc, rc, #24; \ ldrb RT1, [RTAB, xRT1]; \ \ orr rnc, rnc, RT0; \ ldrb RT2, [RTAB, xRT2]; \ and RT0, RMASK, rd; \ ldrb rc, [RTAB, x##rc]; \ orr rnd, rnd, RT1, ror #24; \ and RT1, RMASK, rd, lsr#8; \ orr rna, rna, RT2, ror #16; \ ldrb RT0, [RTAB, xRT0]; \ and RT2, RMASK, rd, lsr#16; \ ldrb RT1, [RTAB, xRT1]; \ orr rnb, rnb, rc, ror #8; \ ldrb RT2, [RTAB, xRT2]; \ lsr rd, rd, #24; \ ldrb rd, [RTAB, x##rd]; \ \ orr rnd, rnd, RT0; \ orr rna, rna, RT1, ror #24; \ orr rnb, rnb, RT2, ror #16; \ orr rnc, rnc, rd, ror #8; #define firstdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ addroundkey_dec(((round) + 1), ra, rb, rc, rd, rna, rnb, rnc, rnd); \ do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); #define decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key); #define set_last_round_rmask(_, __) \ mov RMASK, #0xff; #define lastdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ add RTAB, RTAB, #(4 * 256); \ do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .globl _gcry_aes_arm_decrypt_block -.type _gcry_aes_arm_decrypt_block,%function; +ELF(.type _gcry_aes_arm_decrypt_block,%function;) _gcry_aes_arm_decrypt_block: /* input: * %x0: keysched, CTX * %x1: dst * %x2: src * %w3: number of rounds.. 10, 12 or 14 * %x4: decryption table */ /* read input block */ /* aligned load */ ldp RA, RB, [RSRC]; ldp RC, RD, [RSRC, #8]; #ifndef __AARCH64EL__ rev RA, RA; rev RB, RB; rev RC, RC; rev RD, RD; #endif mov RMASK, #(0xff << 2); cmp NROUNDS, #12; bge .Ldec_256; firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND); .Ldec_tail: decround(8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); decround(7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); decround(6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); decround(5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); decround(4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); decround(3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); decround(2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask); lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD); /* store output block */ /* aligned store */ #ifndef __AARCH64EL__ rev RA, RA; rev RB, RB; rev RC, RC; rev RD, RD; #endif /* write output block */ stp RA, RB, [RDST]; stp RC, RD, [RDST, #8]; mov x0, #(0); ret; .ltorg .Ldec_256: beq .Ldec_192; firstdecround(13, RA, RB, RC, RD, RNA, RNB, RNC, RND); decround(12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); decround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); b .Ldec_tail; .ltorg .Ldec_192: firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND); decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); b .Ldec_tail; -.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block; +ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__ */ diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S index 40097a71..5859557a 100644 --- a/cipher/rijndael-armv8-aarch64-ce.S +++ b/cipher/rijndael-armv8-aarch64-ce.S @@ -1,1588 +1,1588 @@ /* rijndael-armv8-aarch64-ce.S - ARMv8/CE accelerated AES * Copyright (C) 2016 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) .cpu generic+simd+crypto .text #define GET_DATA_POINTER(reg, name) \ adrp reg, :got:name ; \ ldr reg, [reg, #:got_lo12:name] ; /* Register macros */ #define vk0 v17 #define vk1 v18 #define vk2 v19 #define vk3 v20 #define vk4 v21 #define vk5 v22 #define vk6 v23 #define vk7 v24 #define vk8 v25 #define vk9 v26 #define vk10 v27 #define vk11 v28 #define vk12 v29 #define vk13 v30 #define vk14 v31 /* AES macros */ #define aes_preload_keys(keysched, nrounds) \ cmp nrounds, #12; \ ld1 {vk0.16b-vk3.16b}, [keysched], #64; \ ld1 {vk4.16b-vk7.16b}, [keysched], #64; \ ld1 {vk8.16b-vk10.16b}, [keysched], #48; \ b.lo 1f; \ ld1 {vk11.16b-vk12.16b}, [keysched], #32; \ b.eq 1f; \ ld1 {vk13.16b-vk14.16b}, [keysched]; \ 1: ; #define do_aes_one128(ed, mcimc, vo, vb) \ aes##ed vb.16b, vk0.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk1.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk2.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk3.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk4.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk5.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk6.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk7.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk8.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk9.16b; \ eor vo.16b, vb.16b, vk10.16b; #define do_aes_one192(ed, mcimc, vo, vb) \ aes##ed vb.16b, vk0.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk1.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk2.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk3.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk4.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk5.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk6.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk7.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk8.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk9.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk10.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk11.16b; \ eor vo.16b, vb.16b, vk12.16b; #define do_aes_one256(ed, mcimc, vo, vb) \ aes##ed vb.16b, vk0.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk1.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk2.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk3.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk4.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk5.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk6.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk7.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk8.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk9.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk10.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk11.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk12.16b; \ aes##mcimc vb.16b, vb.16b; \ aes##ed vb.16b, vk13.16b; \ eor vo.16b, vb.16b, vk14.16b; #define aes_round_4(ed, mcimc, b0, b1, b2, b3, key) \ aes##ed b0.16b, key.16b; \ aes##mcimc b0.16b, b0.16b; \ aes##ed b1.16b, key.16b; \ aes##mcimc b1.16b, b1.16b; \ aes##ed b2.16b, key.16b; \ aes##mcimc b2.16b, b2.16b; \ aes##ed b3.16b, key.16b; \ aes##mcimc b3.16b, b3.16b; #define aes_lastround_4(ed, b0, b1, b2, b3, key1, key2) \ aes##ed b0.16b, key1.16b; \ eor b0.16b, b0.16b, key2.16b; \ aes##ed b1.16b, key1.16b; \ eor b1.16b, b1.16b, key2.16b; \ aes##ed b2.16b, key1.16b; \ eor b2.16b, b2.16b, key2.16b; \ aes##ed b3.16b, key1.16b; \ eor b3.16b, b3.16b, key2.16b; #define do_aes_4_128(ed, mcimc, b0, b1, b2, b3) \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \ aes_lastround_4(ed, b0, b1, b2, b3, vk9, vk10); #define do_aes_4_192(ed, mcimc, b0, b1, b2, b3) \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk9); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk10); \ aes_lastround_4(ed, b0, b1, b2, b3, vk11, vk12); #define do_aes_4_256(ed, mcimc, b0, b1, b2, b3) \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk0); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk9); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk10); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk11); \ aes_round_4(ed, mcimc, b0, b1, b2, b3, vk12); \ aes_lastround_4(ed, b0, b1, b2, b3, vk13, vk14); /* Other functional macros */ #define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; #define aes_clear_keys(nrounds) \ cmp nrounds, #12; \ CLEAR_REG(vk0); \ CLEAR_REG(vk1); \ CLEAR_REG(vk2); \ CLEAR_REG(vk3); \ CLEAR_REG(vk4); \ CLEAR_REG(vk5); \ CLEAR_REG(vk6); \ CLEAR_REG(vk7); \ CLEAR_REG(vk9); \ CLEAR_REG(vk8); \ CLEAR_REG(vk10); \ b.lo 1f; \ CLEAR_REG(vk11); \ CLEAR_REG(vk12); \ b.eq 1f; \ CLEAR_REG(vk13); \ CLEAR_REG(vk14); \ 1: ; /* * unsigned int _gcry_aes_enc_armv8_ce(void *keysched, byte *dst, * const byte *src, * unsigned int nrounds); */ .align 3 .globl _gcry_aes_enc_armv8_ce -.type _gcry_aes_enc_armv8_ce,%function; +ELF(.type _gcry_aes_enc_armv8_ce,%function;) _gcry_aes_enc_armv8_ce: /* input: * x0: keysched * x1: dst * x2: src * w3: nrounds */ aes_preload_keys(x0, w3); ld1 {v0.16b}, [x2] b.hi .Lenc1_256 b.eq .Lenc1_192 .Lenc1_128: do_aes_one128(e, mc, v0, v0); .Lenc1_tail: CLEAR_REG(vk0) CLEAR_REG(vk1) CLEAR_REG(vk2) CLEAR_REG(vk3) CLEAR_REG(vk4) CLEAR_REG(vk5) CLEAR_REG(vk6) CLEAR_REG(vk7) CLEAR_REG(vk8) CLEAR_REG(vk9) CLEAR_REG(vk10) st1 {v0.16b}, [x1] CLEAR_REG(v0) mov x0, #0 ret .Lenc1_192: do_aes_one192(e, mc, v0, v0); CLEAR_REG(vk11) CLEAR_REG(vk12) b .Lenc1_tail .Lenc1_256: do_aes_one256(e, mc, v0, v0); CLEAR_REG(vk11) CLEAR_REG(vk12) CLEAR_REG(vk13) CLEAR_REG(vk14) b .Lenc1_tail -.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce; +ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;) /* * unsigned int _gcry_aes_dec_armv8_ce(void *keysched, byte *dst, * const byte *src, * unsigned int nrounds); */ .align 3 .globl _gcry_aes_dec_armv8_ce -.type _gcry_aes_dec_armv8_ce,%function; +ELF(.type _gcry_aes_dec_armv8_ce,%function;) _gcry_aes_dec_armv8_ce: /* input: * x0: keysched * x1: dst * x2: src * w3: nrounds */ aes_preload_keys(x0, w3); ld1 {v0.16b}, [x2] b.hi .Ldec1_256 b.eq .Ldec1_192 .Ldec1_128: do_aes_one128(d, imc, v0, v0); .Ldec1_tail: CLEAR_REG(vk0) CLEAR_REG(vk1) CLEAR_REG(vk2) CLEAR_REG(vk3) CLEAR_REG(vk4) CLEAR_REG(vk5) CLEAR_REG(vk6) CLEAR_REG(vk7) CLEAR_REG(vk8) CLEAR_REG(vk9) CLEAR_REG(vk10) st1 {v0.16b}, [x1] CLEAR_REG(v0) mov x0, #0 ret .Ldec1_192: do_aes_one192(d, imc, v0, v0); CLEAR_REG(vk11) CLEAR_REG(vk12) b .Ldec1_tail .Ldec1_256: do_aes_one256(d, imc, v0, v0); CLEAR_REG(vk11) CLEAR_REG(vk12) CLEAR_REG(vk13) CLEAR_REG(vk14) b .Ldec1_tail -.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce; +ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;) /* * void _gcry_aes_cbc_enc_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *iv, size_t nblocks, * int cbc_mac, unsigned int nrounds); */ .align 3 .globl _gcry_aes_cbc_enc_armv8_ce -.type _gcry_aes_cbc_enc_armv8_ce,%function; +ELF(.type _gcry_aes_cbc_enc_armv8_ce,%function;) _gcry_aes_cbc_enc_armv8_ce: /* input: * x0: keysched * x1: outbuf * x2: inbuf * x3: iv * x4: nblocks * w5: cbc_mac * w6: nrounds */ cbz x4, .Lcbc_enc_skip cmp w5, #0 ld1 {v1.16b}, [x3] /* load IV */ cset x5, eq aes_preload_keys(x0, w6); lsl x5, x5, #4 b.eq .Lcbc_enc_loop192 b.hi .Lcbc_enc_loop256 #define CBC_ENC(bits) \ .Lcbc_enc_loop##bits: \ ld1 {v0.16b}, [x2], #16; /* load plaintext */ \ eor v1.16b, v0.16b, v1.16b; \ sub x4, x4, #1; \ \ do_aes_one##bits(e, mc, v1, v1); \ \ st1 {v1.16b}, [x1], x5; /* store ciphertext */ \ \ cbnz x4, .Lcbc_enc_loop##bits; \ b .Lcbc_enc_done; CBC_ENC(128) CBC_ENC(192) CBC_ENC(256) #undef CBC_ENC .Lcbc_enc_done: aes_clear_keys(w6) st1 {v1.16b}, [x3] /* store IV */ CLEAR_REG(v1) CLEAR_REG(v0) .Lcbc_enc_skip: ret -.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce; +ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;) /* * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *iv, unsigned int nrounds); */ .align 3 .globl _gcry_aes_cbc_dec_armv8_ce -.type _gcry_aes_cbc_dec_armv8_ce,%function; +ELF(.type _gcry_aes_cbc_dec_armv8_ce,%function;) _gcry_aes_cbc_dec_armv8_ce: /* input: * x0: keysched * x1: outbuf * x2: inbuf * x3: iv * x4: nblocks * w5: nrounds */ cbz x4, .Lcbc_dec_skip ld1 {v0.16b}, [x3] /* load IV */ aes_preload_keys(x0, w5); b.eq .Lcbc_dec_entry_192 b.hi .Lcbc_dec_entry_256 #define CBC_DEC(bits) \ .Lcbc_dec_entry_##bits: \ cmp x4, #4; \ b.lo .Lcbc_dec_loop_##bits; \ \ .Lcbc_dec_loop4_##bits: \ \ ld1 {v1.16b-v4.16b}, [x2], #64; /* load ciphertext */ \ sub x4, x4, #4; \ mov v5.16b, v1.16b; \ mov v6.16b, v2.16b; \ mov v7.16b, v3.16b; \ mov v16.16b, v4.16b; \ cmp x4, #4; \ \ do_aes_4_##bits(d, imc, v1, v2, v3, v4); \ \ eor v1.16b, v1.16b, v0.16b; \ eor v2.16b, v2.16b, v5.16b; \ st1 {v1.16b-v2.16b}, [x1], #32; /* store plaintext */ \ eor v3.16b, v3.16b, v6.16b; \ eor v4.16b, v4.16b, v7.16b; \ mov v0.16b, v16.16b; /* next IV */ \ st1 {v3.16b-v4.16b}, [x1], #32; /* store plaintext */ \ \ b.hs .Lcbc_dec_loop4_##bits; \ CLEAR_REG(v3); \ CLEAR_REG(v4); \ CLEAR_REG(v5); \ CLEAR_REG(v6); \ CLEAR_REG(v7); \ CLEAR_REG(v16); \ cbz x4, .Lcbc_dec_done; \ \ .Lcbc_dec_loop_##bits: \ ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \ sub x4, x4, #1; \ mov v2.16b, v1.16b; \ \ do_aes_one##bits(d, imc, v1, v1); \ \ eor v1.16b, v1.16b, v0.16b; \ mov v0.16b, v2.16b; \ st1 {v1.16b}, [x1], #16; /* store plaintext */ \ \ cbnz x4, .Lcbc_dec_loop_##bits; \ b .Lcbc_dec_done; CBC_DEC(128) CBC_DEC(192) CBC_DEC(256) #undef CBC_DEC .Lcbc_dec_done: aes_clear_keys(w5) st1 {v0.16b}, [x3] /* store IV */ CLEAR_REG(v0) CLEAR_REG(v1) CLEAR_REG(v2) .Lcbc_dec_skip: ret -.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce; +ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;) /* * void _gcry_aes_ctr_enc_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *iv, unsigned int nrounds); */ .align 3 .globl _gcry_aes_ctr_enc_armv8_ce -.type _gcry_aes_ctr_enc_armv8_ce,%function; +ELF(.type _gcry_aes_ctr_enc_armv8_ce,%function;) _gcry_aes_ctr_enc_armv8_ce: /* input: * r0: keysched * r1: outbuf * r2: inbuf * r3: iv * x4: nblocks * w5: nrounds */ cbz x4, .Lctr_enc_skip mov x6, #1 movi v16.16b, #0 mov v16.D[1], x6 /* load IV */ ldp x9, x10, [x3] ld1 {v0.16b}, [x3] rev x9, x9 rev x10, x10 aes_preload_keys(x0, w5); b.eq .Lctr_enc_entry_192 b.hi .Lctr_enc_entry_256 #define CTR_ENC(bits) \ .Lctr_enc_entry_##bits: \ cmp x4, #4; \ b.lo .Lctr_enc_loop_##bits; \ \ .Lctr_enc_loop4_##bits: \ cmp x10, #0xfffffffffffffffc; \ sub x4, x4, #4; \ b.lo .Lctr_enc_loop4_##bits##_nocarry; \ \ adds x10, x10, #1; \ mov v1.16b, v0.16b; \ adc x9, x9, xzr; \ mov v2.D[1], x10; \ mov v2.D[0], x9; \ \ adds x10, x10, #1; \ rev64 v2.16b, v2.16b; \ adc x9, x9, xzr; \ mov v3.D[1], x10; \ mov v3.D[0], x9; \ \ adds x10, x10, #1; \ rev64 v3.16b, v3.16b; \ adc x9, x9, xzr; \ mov v4.D[1], x10; \ mov v4.D[0], x9; \ \ adds x10, x10, #1; \ rev64 v4.16b, v4.16b; \ adc x9, x9, xzr; \ mov v0.D[1], x10; \ mov v0.D[0], x9; \ rev64 v0.16b, v0.16b; \ \ b .Lctr_enc_loop4_##bits##_store_ctr; \ \ .Lctr_enc_loop4_##bits##_nocarry: \ \ add v3.2d, v16.2d, v16.2d; /* 2 */ \ rev64 v6.16b, v0.16b; \ add x10, x10, #4; \ add v4.2d, v3.2d, v16.2d; /* 3 */ \ add v0.2d, v3.2d, v3.2d; /* 4 */ \ rev64 v1.16b, v6.16b; \ add v2.2d, v6.2d, v16.2d; \ add v3.2d, v6.2d, v3.2d; \ add v4.2d, v6.2d, v4.2d; \ add v0.2d, v6.2d, v0.2d; \ rev64 v2.16b, v2.16b; \ rev64 v3.16b, v3.16b; \ rev64 v0.16b, v0.16b; \ rev64 v4.16b, v4.16b; \ \ .Lctr_enc_loop4_##bits##_store_ctr: \ \ st1 {v0.16b}, [x3]; \ cmp x4, #4; \ ld1 {v5.16b-v7.16b}, [x2], #48; /* preload ciphertext */ \ \ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ \ eor v1.16b, v1.16b, v5.16b; \ ld1 {v5.16b}, [x2], #16; /* load ciphertext */ \ eor v2.16b, v2.16b, v6.16b; \ eor v3.16b, v3.16b, v7.16b; \ eor v4.16b, v4.16b, v5.16b; \ st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \ \ b.hs .Lctr_enc_loop4_##bits; \ CLEAR_REG(v3); \ CLEAR_REG(v4); \ CLEAR_REG(v5); \ CLEAR_REG(v6); \ CLEAR_REG(v7); \ cbz x4, .Lctr_enc_done; \ \ .Lctr_enc_loop_##bits: \ \ adds x10, x10, #1; \ mov v1.16b, v0.16b; \ adc x9, x9, xzr; \ mov v0.D[1], x10; \ mov v0.D[0], x9; \ sub x4, x4, #1; \ ld1 {v2.16b}, [x2], #16; /* load ciphertext */ \ rev64 v0.16b, v0.16b; \ \ do_aes_one##bits(e, mc, v1, v1); \ \ eor v1.16b, v2.16b, v1.16b; \ st1 {v1.16b}, [x1], #16; /* store plaintext */ \ \ cbnz x4, .Lctr_enc_loop_##bits; \ b .Lctr_enc_done; CTR_ENC(128) CTR_ENC(192) CTR_ENC(256) #undef CTR_ENC .Lctr_enc_done: aes_clear_keys(w5) st1 {v0.16b}, [x3] /* store IV */ CLEAR_REG(v0) CLEAR_REG(v1) CLEAR_REG(v2) .Lctr_enc_skip: ret -.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce; +ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;) /* * void _gcry_aes_cfb_enc_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *iv, unsigned int nrounds); */ .align 3 .globl _gcry_aes_cfb_enc_armv8_ce -.type _gcry_aes_cfb_enc_armv8_ce,%function; +ELF(.type _gcry_aes_cfb_enc_armv8_ce,%function;) _gcry_aes_cfb_enc_armv8_ce: /* input: * r0: keysched * r1: outbuf * r2: inbuf * r3: iv * x4: nblocks * w5: nrounds */ cbz x4, .Lcfb_enc_skip /* load IV */ ld1 {v0.16b}, [x3] aes_preload_keys(x0, w5); b.eq .Lcfb_enc_entry_192 b.hi .Lcfb_enc_entry_256 #define CFB_ENC(bits) \ .Lcfb_enc_entry_##bits: \ .Lcfb_enc_loop_##bits: \ ld1 {v1.16b}, [x2], #16; /* load plaintext */ \ sub x4, x4, #1; \ \ do_aes_one##bits(e, mc, v0, v0); \ \ eor v0.16b, v1.16b, v0.16b; \ st1 {v0.16b}, [x1], #16; /* store ciphertext */ \ \ cbnz x4, .Lcfb_enc_loop_##bits; \ b .Lcfb_enc_done; CFB_ENC(128) CFB_ENC(192) CFB_ENC(256) #undef CFB_ENC .Lcfb_enc_done: aes_clear_keys(w5) st1 {v0.16b}, [x3] /* store IV */ CLEAR_REG(v0) CLEAR_REG(v1) .Lcfb_enc_skip: ret -.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce; +ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;) /* * void _gcry_aes_cfb_dec_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *iv, unsigned int nrounds); */ .align 3 .globl _gcry_aes_cfb_dec_armv8_ce -.type _gcry_aes_cfb_dec_armv8_ce,%function; +ELF(.type _gcry_aes_cfb_dec_armv8_ce,%function;) _gcry_aes_cfb_dec_armv8_ce: /* input: * r0: keysched * r1: outbuf * r2: inbuf * r3: iv * x4: nblocks * w5: nrounds */ cbz x4, .Lcfb_dec_skip /* load IV */ ld1 {v0.16b}, [x3] aes_preload_keys(x0, w5); b.eq .Lcfb_dec_entry_192 b.hi .Lcfb_dec_entry_256 #define CFB_DEC(bits) \ .Lcfb_dec_entry_##bits: \ cmp x4, #4; \ b.lo .Lcfb_dec_loop_##bits; \ \ .Lcfb_dec_loop4_##bits: \ \ ld1 {v2.16b-v4.16b}, [x2], #48; /* load ciphertext */ \ mov v1.16b, v0.16b; \ sub x4, x4, #4; \ cmp x4, #4; \ mov v5.16b, v2.16b; \ mov v6.16b, v3.16b; \ mov v7.16b, v4.16b; \ ld1 {v0.16b}, [x2], #16; /* load next IV / ciphertext */ \ \ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ \ eor v1.16b, v1.16b, v5.16b; \ eor v2.16b, v2.16b, v6.16b; \ eor v3.16b, v3.16b, v7.16b; \ eor v4.16b, v4.16b, v0.16b; \ st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \ \ b.hs .Lcfb_dec_loop4_##bits; \ CLEAR_REG(v3); \ CLEAR_REG(v4); \ CLEAR_REG(v5); \ CLEAR_REG(v6); \ CLEAR_REG(v7); \ cbz x4, .Lcfb_dec_done; \ \ .Lcfb_dec_loop_##bits: \ \ ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \ \ sub x4, x4, #1; \ \ do_aes_one##bits(e, mc, v0, v0); \ \ eor v2.16b, v1.16b, v0.16b; \ mov v0.16b, v1.16b; \ st1 {v2.16b}, [x1], #16; /* store plaintext */ \ \ cbnz x4, .Lcfb_dec_loop_##bits; \ b .Lcfb_dec_done; CFB_DEC(128) CFB_DEC(192) CFB_DEC(256) #undef CFB_DEC .Lcfb_dec_done: aes_clear_keys(w5) st1 {v0.16b}, [x3] /* store IV */ CLEAR_REG(v0) CLEAR_REG(v1) CLEAR_REG(v2) .Lcfb_dec_skip: ret -.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce; +ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;) /* * void _gcry_aes_ocb_enc_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *offset, * unsigned char *checksum, * unsigned char *L_table, * size_t nblocks, * unsigned int nrounds, * unsigned int blkn); */ .align 3 .globl _gcry_aes_ocb_enc_armv8_ce -.type _gcry_aes_ocb_enc_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_enc_armv8_ce,%function;) _gcry_aes_ocb_enc_armv8_ce: /* input: * x0: keysched * x1: outbuf * x2: inbuf * x3: offset * x4: checksum * x5: Ltable * x6: nblocks (0 < nblocks <= 32) * w7: nrounds * %st+0: blkn => w12 */ ldr w12, [sp] ld1 {v0.16b}, [x3] /* load offset */ ld1 {v16.16b}, [x4] /* load checksum */ aes_preload_keys(x0, w7); b.eq .Locb_enc_entry_192 b.hi .Locb_enc_entry_256 #define OCB_ENC(bits, ...) \ .Locb_enc_entry_##bits: \ cmp x6, #4; \ add x12, x12, #1; \ b.lo .Locb_enc_loop_##bits; \ \ .Locb_enc_loop4_##bits: \ \ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ /* Checksum_i = Checksum_{i-1} xor P_i */ \ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \ \ add w9, w12, #1; \ add w10, w12, #2; \ add w11, w12, #3; \ rbit w8, w12; \ add w12, w12, #4; \ rbit w9, w9; \ rbit w10, w10; \ rbit w11, w11; \ clz w8, w8; /* ntz(i+0) */ \ clz w9, w9; /* ntz(i+1) */ \ clz w10, w10; /* ntz(i+2) */ \ clz w11, w11; /* ntz(i+3) */ \ add x8, x5, x8, lsl #4; \ ld1 {v1.16b-v4.16b}, [x2], #64; /* load P_i+<0-3> */ \ add x9, x5, x9, lsl #4; \ add x10, x5, x10, lsl #4; \ add x11, x5, x11, lsl #4; \ \ sub x6, x6, #4; \ \ ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \ eor v16.16b, v16.16b, v1.16b; /* Checksum_i+0 */ \ ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \ eor v16.16b, v16.16b, v2.16b; /* Checksum_i+1 */ \ ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \ eor v16.16b, v16.16b, v3.16b; /* Checksum_i+2 */ \ eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \ ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \ eor v16.16b, v16.16b, v4.16b; /* Checksum_i+3 */ \ eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \ eor v1.16b, v1.16b, v5.16b; /* P_i+0 xor Offset_i+0 */ \ eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \ eor v2.16b, v2.16b, v6.16b; /* P_i+1 xor Offset_i+1 */ \ eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \ cmp x6, #4; \ eor v3.16b, v3.16b, v7.16b; /* P_i+2 xor Offset_i+2 */ \ eor v4.16b, v4.16b, v0.16b; /* P_i+3 xor Offset_i+3 */ \ \ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ \ eor v1.16b, v1.16b, v5.16b; /* xor Offset_i+0 */ \ eor v2.16b, v2.16b, v6.16b; /* xor Offset_i+1 */ \ eor v3.16b, v3.16b, v7.16b; /* xor Offset_i+2 */ \ eor v4.16b, v4.16b, v0.16b; /* xor Offset_i+3 */ \ st1 {v1.16b-v4.16b}, [x1], #64; \ \ b.hs .Locb_enc_loop4_##bits; \ CLEAR_REG(v3); \ CLEAR_REG(v4); \ CLEAR_REG(v5); \ CLEAR_REG(v6); \ CLEAR_REG(v7); \ cbz x6, .Locb_enc_done; \ \ .Locb_enc_loop_##bits: \ \ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ /* Checksum_i = Checksum_{i-1} xor P_i */ \ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ \ \ rbit x8, x12; \ add x12, x12, #1; \ clz x8, x8; /* ntz(i) */ \ add x8, x5, x8, lsl #4; \ \ ld1 {v1.16b}, [x2], #16; /* load plaintext */ \ ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \ sub x6, x6, #1; \ eor v0.16b, v0.16b, v2.16b; \ eor v16.16b, v16.16b, v1.16b; \ eor v1.16b, v1.16b, v0.16b; \ \ do_aes_one##bits(e, mc, v1, v1); \ \ eor v1.16b, v1.16b, v0.16b; \ st1 {v1.16b}, [x1], #16; /* store ciphertext */ \ \ cbnz x6, .Locb_enc_loop_##bits; \ b .Locb_enc_done; OCB_ENC(128) OCB_ENC(192) OCB_ENC(256) #undef OCB_ENC .Locb_enc_done: aes_clear_keys(w7) st1 {v16.16b}, [x4] /* store checksum */ st1 {v0.16b}, [x3] /* store offset */ CLEAR_REG(v0) CLEAR_REG(v1) CLEAR_REG(v2) CLEAR_REG(v16) ret -.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce; +ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;) /* * void _gcry_aes_ocb_dec_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *offset, * unsigned char *checksum, * unsigned char *L_table, * size_t nblocks, * unsigned int nrounds, * unsigned int blkn); */ .align 3 .globl _gcry_aes_ocb_dec_armv8_ce -.type _gcry_aes_ocb_dec_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_dec_armv8_ce,%function;) _gcry_aes_ocb_dec_armv8_ce: /* input: * x0: keysched * x1: outbuf * x2: inbuf * x3: offset * x4: checksum * x5: Ltable * x6: nblocks (0 < nblocks <= 32) * w7: nrounds * %st+0: blkn => w12 */ ldr w12, [sp] ld1 {v0.16b}, [x3] /* load offset */ ld1 {v16.16b}, [x4] /* load checksum */ aes_preload_keys(x0, w7); b.eq .Locb_dec_entry_192 b.hi .Locb_dec_entry_256 #define OCB_DEC(bits) \ .Locb_dec_entry_##bits: \ cmp x6, #4; \ add w12, w12, #1; \ b.lo .Locb_dec_loop_##bits; \ \ .Locb_dec_loop4_##bits: \ \ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \ /* Checksum_i = Checksum_{i-1} xor P_i */ \ \ add w9, w12, #1; \ add w10, w12, #2; \ add w11, w12, #3; \ rbit w8, w12; \ add w12, w12, #4; \ rbit w9, w9; \ rbit w10, w10; \ rbit w11, w11; \ clz w8, w8; /* ntz(i+0) */ \ clz w9, w9; /* ntz(i+1) */ \ clz w10, w10; /* ntz(i+2) */ \ clz w11, w11; /* ntz(i+3) */ \ add x8, x5, x8, lsl #4; \ ld1 {v1.16b-v4.16b}, [x2], #64; /* load C_i+<0-3> */ \ add x9, x5, x9, lsl #4; \ add x10, x5, x10, lsl #4; \ add x11, x5, x11, lsl #4; \ \ sub x6, x6, #4; \ \ ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \ ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \ ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \ eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \ ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \ eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \ eor v1.16b, v1.16b, v5.16b; /* C_i+0 xor Offset_i+0 */ \ eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \ eor v2.16b, v2.16b, v6.16b; /* C_i+1 xor Offset_i+1 */ \ eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \ cmp x6, #4; \ eor v3.16b, v3.16b, v7.16b; /* C_i+2 xor Offset_i+2 */ \ eor v4.16b, v4.16b, v0.16b; /* C_i+3 xor Offset_i+3 */ \ \ do_aes_4_##bits(d, imc, v1, v2, v3, v4); \ \ eor v1.16b, v1.16b, v5.16b; /* xor Offset_i+0 */ \ eor v2.16b, v2.16b, v6.16b; /* xor Offset_i+1 */ \ eor v16.16b, v16.16b, v1.16b; /* Checksum_i+0 */ \ eor v3.16b, v3.16b, v7.16b; /* xor Offset_i+2 */ \ eor v16.16b, v16.16b, v2.16b; /* Checksum_i+1 */ \ eor v4.16b, v4.16b, v0.16b; /* xor Offset_i+3 */ \ eor v16.16b, v16.16b, v3.16b; /* Checksum_i+2 */ \ eor v16.16b, v16.16b, v4.16b; /* Checksum_i+3 */ \ st1 {v1.16b-v4.16b}, [x1], #64; \ \ b.hs .Locb_dec_loop4_##bits; \ CLEAR_REG(v3); \ CLEAR_REG(v4); \ CLEAR_REG(v5); \ CLEAR_REG(v6); \ CLEAR_REG(v7); \ cbz x6, .Locb_dec_done; \ \ .Locb_dec_loop_##bits: \ \ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ \ /* Checksum_i = Checksum_{i-1} xor P_i */ \ \ rbit w8, w12; \ add w12, w12, #1; \ clz w8, w8; /* ntz(i) */ \ add x8, x5, x8, lsl #4; \ \ ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \ ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \ sub x6, x6, #1; \ eor v0.16b, v0.16b, v2.16b; \ eor v1.16b, v1.16b, v0.16b; \ \ do_aes_one##bits(d, imc, v1, v1) \ \ eor v1.16b, v1.16b, v0.16b; \ st1 {v1.16b}, [x1], #16; /* store plaintext */ \ eor v16.16b, v16.16b, v1.16b; \ \ cbnz x6, .Locb_dec_loop_##bits; \ b .Locb_dec_done; OCB_DEC(128) OCB_DEC(192) OCB_DEC(256) #undef OCB_DEC .Locb_dec_done: aes_clear_keys(w7) st1 {v16.16b}, [x4] /* store checksum */ st1 {v0.16b}, [x3] /* store offset */ CLEAR_REG(v0) CLEAR_REG(v1) CLEAR_REG(v2) CLEAR_REG(v16) ret -.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce; +ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;) /* * void _gcry_aes_ocb_auth_armv8_ce (const void *keysched, * const unsigned char *abuf, * unsigned char *offset, * unsigned char *checksum, * unsigned char *L_table, * size_t nblocks, * unsigned int nrounds, * unsigned int blkn); */ .align 3 .globl _gcry_aes_ocb_auth_armv8_ce -.type _gcry_aes_ocb_auth_armv8_ce,%function; +ELF(.type _gcry_aes_ocb_auth_armv8_ce,%function;) _gcry_aes_ocb_auth_armv8_ce: /* input: * x0: keysched * x1: abuf * x2: offset => x3 * x3: checksum => x4 * x4: Ltable => x5 * x5: nblocks => x6 (0 < nblocks <= 32) * w6: nrounds => w7 * w7: blkn => w12 */ mov x12, x7 mov x7, x6 mov x6, x5 mov x5, x4 mov x4, x3 mov x3, x2 aes_preload_keys(x0, w7); ld1 {v0.16b}, [x3] /* load offset */ ld1 {v16.16b}, [x4] /* load checksum */ beq .Locb_auth_entry_192 bhi .Locb_auth_entry_256 #define OCB_AUTH(bits) \ .Locb_auth_entry_##bits: \ cmp x6, #4; \ add w12, w12, #1; \ b.lo .Locb_auth_loop_##bits; \ \ .Locb_auth_loop4_##bits: \ \ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \ \ add w9, w12, #1; \ add w10, w12, #2; \ add w11, w12, #3; \ rbit w8, w12; \ add w12, w12, #4; \ rbit w9, w9; \ rbit w10, w10; \ rbit w11, w11; \ clz w8, w8; /* ntz(i+0) */ \ clz w9, w9; /* ntz(i+1) */ \ clz w10, w10; /* ntz(i+2) */ \ clz w11, w11; /* ntz(i+3) */ \ add x8, x5, x8, lsl #4; \ ld1 {v1.16b-v4.16b}, [x1], #64; /* load A_i+<0-3> */ \ add x9, x5, x9, lsl #4; \ add x10, x5, x10, lsl #4; \ add x11, x5, x11, lsl #4; \ \ sub x6, x6, #4; \ \ ld1 {v5.16b}, [x8]; /* load L_{ntz(i+0)} */ \ ld1 {v6.16b}, [x9]; /* load L_{ntz(i+1)} */ \ ld1 {v7.16b}, [x10]; /* load L_{ntz(i+2)} */ \ eor v5.16b, v5.16b, v0.16b; /* Offset_i+0 */ \ ld1 {v0.16b}, [x11]; /* load L_{ntz(i+3)} */ \ eor v6.16b, v6.16b, v5.16b; /* Offset_i+1 */ \ eor v1.16b, v1.16b, v5.16b; /* A_i+0 xor Offset_i+0 */ \ eor v7.16b, v7.16b, v6.16b; /* Offset_i+2 */ \ eor v2.16b, v2.16b, v6.16b; /* A_i+1 xor Offset_i+1 */ \ eor v0.16b, v0.16b, v7.16b; /* Offset_i+3 */ \ cmp x6, #4; \ eor v3.16b, v3.16b, v7.16b; /* A_i+2 xor Offset_i+2 */ \ eor v4.16b, v4.16b, v0.16b; /* A_i+3 xor Offset_i+3 */ \ \ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ \ eor v1.16b, v1.16b, v2.16b; \ eor v16.16b, v16.16b, v3.16b; \ eor v1.16b, v1.16b, v4.16b; \ eor v16.16b, v16.16b, v1.16b; \ \ b.hs .Locb_auth_loop4_##bits; \ CLEAR_REG(v3); \ CLEAR_REG(v4); \ CLEAR_REG(v5); \ CLEAR_REG(v6); \ CLEAR_REG(v7); \ cbz x6, .Locb_auth_done; \ \ .Locb_auth_loop_##bits: \ \ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ \ \ rbit w8, w12; \ add w12, w12, #1; \ clz w8, w8; /* ntz(i) */ \ add x8, x5, x8, lsl #4; \ \ ld1 {v1.16b}, [x1], #16; /* load aadtext */ \ ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \ sub x6, x6, #1; \ eor v0.16b, v0.16b, v2.16b; \ eor v1.16b, v1.16b, v0.16b; \ \ do_aes_one##bits(e, mc, v1, v1) \ \ eor v16.16b, v16.16b, v1.16b; \ \ cbnz x6, .Locb_auth_loop_##bits; \ b .Locb_auth_done; OCB_AUTH(128) OCB_AUTH(192) OCB_AUTH(256) #undef OCB_AUTH .Locb_auth_done: aes_clear_keys(w7) st1 {v16.16b}, [x4] /* store checksum */ st1 {v0.16b}, [x3] /* store offset */ CLEAR_REG(v0) CLEAR_REG(v1) CLEAR_REG(v2) CLEAR_REG(v16) ret -.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce; +ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;) /* * void _gcry_aes_xts_enc_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *tweak, unsigned int nrounds); */ .align 3 .globl _gcry_aes_xts_enc_armv8_ce -.type _gcry_aes_xts_enc_armv8_ce,%function; +ELF(.type _gcry_aes_xts_enc_armv8_ce,%function;) _gcry_aes_xts_enc_armv8_ce: /* input: * r0: keysched * r1: outbuf * r2: inbuf * r3: tweak * x4: nblocks * w5: nrounds */ cbz x4, .Lxts_enc_skip /* load tweak */ ld1 {v0.16b}, [x3] /* load gfmul mask */ mov x6, #0x87 mov x7, #0x01 mov v16.D[0], x6 mov v16.D[1], x7 aes_preload_keys(x0, w5); b.eq .Lxts_enc_entry_192 b.hi .Lxts_enc_entry_256 #define XTS_ENC(bits) \ .Lxts_enc_entry_##bits: \ cmp x4, #4; \ b.lo .Lxts_enc_loop_##bits; \ \ .Lxts_enc_loop4_##bits: \ \ ext v4.16b, v0.16b, v0.16b, #8; \ \ sshr v2.2d, v4.2d, #63; \ add v5.2d, v0.2d, v0.2d; \ and v2.16b, v2.16b, v16.16b; \ add v4.2d, v4.2d, v4.2d; \ eor v5.16b, v5.16b, v2.16b; \ \ sshr v2.2d, v4.2d, #63; \ add v6.2d, v5.2d, v5.2d; \ and v2.16b, v2.16b, v16.16b; \ add v4.2d, v4.2d, v4.2d; \ eor v6.16b, v6.16b, v2.16b; \ \ sshr v2.2d, v4.2d, #63; \ add v7.2d, v6.2d, v6.2d; \ and v2.16b, v2.16b, v16.16b; \ add v4.2d, v4.2d, v4.2d; \ eor v7.16b, v7.16b, v2.16b; \ \ sshr v2.2d, v4.2d, #63; \ add v3.2d, v7.2d, v7.2d; \ and v2.16b, v2.16b, v16.16b; \ add v4.2d, v4.2d, v4.2d; \ eor v3.16b, v3.16b, v2.16b; \ ld1 {v1.16b-v2.16b}, [x2], #32; /* load plaintext */ \ st1 {v3.16b}, [x3]; \ sub x4, x4, #4; \ eor v1.16b, v1.16b, v0.16b; \ \ ld1 {v3.16b-v4.16b}, [x2], #32; /* load plaintext */ \ cmp x4, #4; \ eor v2.16b, v2.16b, v5.16b; \ eor v3.16b, v3.16b, v6.16b; \ eor v4.16b, v4.16b, v7.16b; \ \ do_aes_4_##bits(e, mc, v1, v2, v3, v4); \ \ eor v1.16b, v1.16b, v0.16b; \ ld1 {v0.16b}, [x3]; \ eor v2.16b, v2.16b, v5.16b; \ eor v3.16b, v3.16b, v6.16b; \ eor v4.16b, v4.16b, v7.16b; \ st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \ \ b.hs .Lxts_enc_loop4_##bits; \ CLEAR_REG(v3); \ CLEAR_REG(v4); \ CLEAR_REG(v5); \ CLEAR_REG(v6); \ CLEAR_REG(v7); \ cbz x4, .Lxts_enc_done; \ \ .Lxts_enc_loop_##bits: \ \ ld1 {v1.16b}, [x2], #16; /* load plaintext */ \ ext v3.16b, v0.16b, v0.16b, #8; \ mov v2.16b, v0.16b; \ sshr v3.2d, v3.2d, #63; \ add v0.2d, v0.2d, v0.2d; \ and v3.16b, v3.16b, v16.16b; \ eor v1.16b, v1.16b, v2.16b; \ eor v0.16b, v0.16b, v3.16b; \ sub x4, x4, #1; \ \ do_aes_one##bits(e, mc, v1, v1); \ \ eor v1.16b, v1.16b, v2.16b; \ st1 {v1.16b}, [x1], #16; /* store ciphertext */ \ \ cbnz x4, .Lxts_enc_loop_##bits; \ b .Lxts_enc_done; XTS_ENC(128) XTS_ENC(192) XTS_ENC(256) #undef XTS_ENC .Lxts_enc_done: aes_clear_keys(w5) st1 {v0.16b}, [x3] /* store tweak */ CLEAR_REG(v0) CLEAR_REG(v1) CLEAR_REG(v2) .Lxts_enc_skip: ret -.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce; +ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;) /* * void _gcry_aes_xts_dec_armv8_ce (const void *keysched, * unsigned char *outbuf, * const unsigned char *inbuf, * unsigned char *tweak, unsigned int nrounds); */ .align 3 .globl _gcry_aes_xts_dec_armv8_ce -.type _gcry_aes_xts_dec_armv8_ce,%function; +ELF(.type _gcry_aes_xts_dec_armv8_ce,%function;) _gcry_aes_xts_dec_armv8_ce: /* input: * r0: keysched * r1: outbuf * r2: inbuf * r3: tweak * x4: nblocks * w5: nrounds */ cbz x4, .Lxts_dec_skip /* load tweak */ ld1 {v0.16b}, [x3] /* load gfmul mask */ mov x6, #0x87 mov x7, #0x01 mov v16.D[0], x6 mov v16.D[1], x7 aes_preload_keys(x0, w5); b.eq .Lxts_dec_entry_192 b.hi .Lxts_dec_entry_256 #define XTS_DEC(bits) \ .Lxts_dec_entry_##bits: \ cmp x4, #4; \ b.lo .Lxts_dec_loop_##bits; \ \ .Lxts_dec_loop4_##bits: \ \ ext v4.16b, v0.16b, v0.16b, #8; \ \ sshr v2.2d, v4.2d, #63; \ add v5.2d, v0.2d, v0.2d; \ and v2.16b, v2.16b, v16.16b; \ add v4.2d, v4.2d, v4.2d; \ eor v5.16b, v5.16b, v2.16b; \ \ sshr v2.2d, v4.2d, #63; \ add v6.2d, v5.2d, v5.2d; \ and v2.16b, v2.16b, v16.16b; \ add v4.2d, v4.2d, v4.2d; \ eor v6.16b, v6.16b, v2.16b; \ \ sshr v2.2d, v4.2d, #63; \ add v7.2d, v6.2d, v6.2d; \ and v2.16b, v2.16b, v16.16b; \ add v4.2d, v4.2d, v4.2d; \ eor v7.16b, v7.16b, v2.16b; \ \ sshr v2.2d, v4.2d, #63; \ add v3.2d, v7.2d, v7.2d; \ and v2.16b, v2.16b, v16.16b; \ add v4.2d, v4.2d, v4.2d; \ eor v3.16b, v3.16b, v2.16b; \ ld1 {v1.16b-v2.16b}, [x2], #32; /* load plaintext */ \ st1 {v3.16b}, [x3]; \ sub x4, x4, #4; \ eor v1.16b, v1.16b, v0.16b; \ \ ld1 {v3.16b-v4.16b}, [x2], #32; /* load plaintext */ \ cmp x4, #4; \ eor v2.16b, v2.16b, v5.16b; \ eor v3.16b, v3.16b, v6.16b; \ eor v4.16b, v4.16b, v7.16b; \ \ do_aes_4_##bits(d, imc, v1, v2, v3, v4); \ \ eor v1.16b, v1.16b, v0.16b; \ ld1 {v0.16b}, [x3]; \ eor v2.16b, v2.16b, v5.16b; \ eor v3.16b, v3.16b, v6.16b; \ eor v4.16b, v4.16b, v7.16b; \ st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \ \ b.hs .Lxts_dec_loop4_##bits; \ CLEAR_REG(v3); \ CLEAR_REG(v4); \ CLEAR_REG(v5); \ CLEAR_REG(v6); \ CLEAR_REG(v7); \ cbz x4, .Lxts_dec_done; \ \ .Lxts_dec_loop_##bits: \ \ ld1 {v1.16b}, [x2], #16; /* load plaintext */ \ ext v3.16b, v0.16b, v0.16b, #8; \ mov v2.16b, v0.16b; \ sshr v3.2d, v3.2d, #63; \ add v0.2d, v0.2d, v0.2d; \ and v3.16b, v3.16b, v16.16b; \ eor v1.16b, v1.16b, v2.16b; \ eor v0.16b, v0.16b, v3.16b; \ sub x4, x4, #1; \ \ do_aes_one##bits(d, imc, v1, v1); \ \ eor v1.16b, v1.16b, v2.16b; \ st1 {v1.16b}, [x1], #16; /* store ciphertext */ \ \ cbnz x4, .Lxts_dec_loop_##bits; \ b .Lxts_dec_done; XTS_DEC(128) XTS_DEC(192) XTS_DEC(256) #undef XTS_DEC .Lxts_dec_done: aes_clear_keys(w5) st1 {v0.16b}, [x3] /* store tweak */ CLEAR_REG(v0) CLEAR_REG(v1) CLEAR_REG(v2) .Lxts_dec_skip: ret -.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce; +ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;) /* * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b); */ .align 3 .globl _gcry_aes_sbox4_armv8_ce -.type _gcry_aes_sbox4_armv8_ce,%function; +ELF(.type _gcry_aes_sbox4_armv8_ce,%function;) _gcry_aes_sbox4_armv8_ce: /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in * Cryptology — CT-RSA 2015" for details. */ movi v0.16b, #0x52 movi v1.16b, #0 mov v0.S[0], w0 aese v0.16b, v1.16b addv s0, v0.4s mov w0, v0.S[0] CLEAR_REG(v0) ret -.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce; +ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;) /* * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src); */ .align 3 .globl _gcry_aes_invmixcol_armv8_ce -.type _gcry_aes_invmixcol_armv8_ce,%function; +ELF(.type _gcry_aes_invmixcol_armv8_ce,%function;) _gcry_aes_invmixcol_armv8_ce: ld1 {v0.16b}, [x1] aesimc v0.16b, v0.16b st1 {v0.16b}, [x0] CLEAR_REG(v0) ret -.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce; +ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;) #endif diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S index ec1810d5..aeb67a12 100644 --- a/cipher/sha1-armv8-aarch64-ce.S +++ b/cipher/sha1-armv8-aarch64-ce.S @@ -1,204 +1,204 @@ /* sha1-armv8-aarch64-ce.S - ARM/CE accelerated SHA-1 transform function * Copyright (C) 2016 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && defined(USE_SHA1) .cpu generic+simd+crypto .text #define GET_DATA_POINTER(reg, name) \ adrp reg, :got:name ; \ ldr reg, [reg, #:got_lo12:name] ; /* Constants */ #define K1 0x5A827999 #define K2 0x6ED9EBA1 #define K3 0x8F1BBCDC #define K4 0xCA62C1D6 .align 4 gcry_sha1_aarch64_ce_K_VEC: .LK_VEC: .LK1: .long K1, K1, K1, K1 .LK2: .long K2, K2, K2, K2 .LK3: .long K3, K3, K3, K3 .LK4: .long K4, K4, K4, K4 /* Register macros */ #define sH4 s0 #define vH4 v0 #define vH0123 v1 #define qABCD q2 #define sABCD s2 #define vABCD v2 #define sE0 s3 #define vE0 v3 #define sE1 s4 #define vE1 v4 #define vT0 v5 #define vT1 v6 #define vW0 v16 #define vW1 v17 #define vW2 v18 #define vW3 v19 #define vK1 v20 #define vK2 v21 #define vK3 v22 #define vK4 v23 /* Round macros */ #define _(...) /*_*/ #define do_add(dst, src0, src1) add dst.4s, src0.4s, src1.4s; #define do_sha1su0(w0,w1,w2) sha1su0 w0.4s,w1.4s,w2.4s; #define do_sha1su1(w0,w3) sha1su1 w0.4s,w3.4s; #define do_rounds(f, e0, e1, t, k, w0, w1, w2, w3, add_fn, sha1su0_fn, sha1su1_fn) \ sha1su1_fn( v##w3, v##w2 ); \ sha1h e0, sABCD; \ sha1##f qABCD, e1, v##t.4s; \ add_fn( v##t, v##w2, v##k ); \ sha1su0_fn( v##w0, v##w1, v##w2 ); /* Other functional macros */ #define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; /* * unsigned int * _gcry_sha1_transform_armv8_ce (void *ctx, const unsigned char *data, * size_t nblks) */ .align 3 .globl _gcry_sha1_transform_armv8_ce -.type _gcry_sha1_transform_armv8_ce,%function; +ELF(.type _gcry_sha1_transform_armv8_ce,%function;) _gcry_sha1_transform_armv8_ce: /* input: * x0: ctx, CTX * x1: data (64*nblks bytes) * x2: nblks */ cbz x2, .Ldo_nothing; GET_DATA_POINTER(x4, .LK_VEC); ld1 {vH0123.4s}, [x0] /* load h0,h1,h2,h3 */ ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */ ldr sH4, [x0, #16] /* load h4 */ ld1 {vW0.16b-vW3.16b}, [x1], #64 mov vABCD.16b, vH0123.16b rev32 vW0.16b, vW0.16b rev32 vW1.16b, vW1.16b rev32 vW2.16b, vW2.16b do_add(vT0, vW0, vK1) rev32 vW3.16b, vW3.16b do_add(vT1, vW1, vK1) .Loop: do_rounds(c, sE1, sH4, T0, K1, W0, W1, W2, W3, do_add, do_sha1su0, _) sub x2, x2, #1 do_rounds(c, sE0, sE1, T1, K1, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1) do_rounds(c, sE1, sE0, T0, K1, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1) do_rounds(c, sE0, sE1, T1, K2, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1) do_rounds(c, sE1, sE0, T0, K2, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1) do_rounds(p, sE0, sE1, T1, K2, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1) do_rounds(p, sE1, sE0, T0, K2, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1) do_rounds(p, sE0, sE1, T1, K2, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1) do_rounds(p, sE1, sE0, T0, K3, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1) do_rounds(p, sE0, sE1, T1, K3, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1) do_rounds(m, sE1, sE0, T0, K3, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1) do_rounds(m, sE0, sE1, T1, K3, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1) do_rounds(m, sE1, sE0, T0, K3, W0, W1, W2, W3, do_add, do_sha1su0, do_sha1su1) do_rounds(m, sE0, sE1, T1, K4, W1, W2, W3, W0, do_add, do_sha1su0, do_sha1su1) do_rounds(m, sE1, sE0, T0, K4, W2, W3, W0, W1, do_add, do_sha1su0, do_sha1su1) do_rounds(p, sE0, sE1, T1, K4, W3, W0, W1, W2, do_add, do_sha1su0, do_sha1su1) cbz x2, .Lend ld1 {vW0.16b-vW1.16b}, [x1], #32 /* preload */ do_rounds(p, sE1, sE0, T0, K4, _ , _ , W2, W3, do_add, _, do_sha1su1) rev32 vW0.16b, vW0.16b ld1 {vW2.16b}, [x1], #16 rev32 vW1.16b, vW1.16b do_rounds(p, sE0, sE1, T1, K4, _ , _ , W3, _ , do_add, _, _) ld1 {vW3.16b}, [x1], #16 rev32 vW2.16b, vW2.16b do_rounds(p, sE1, sE0, T0, _, _, _, _, _, _, _, _) rev32 vW3.16b, vW3.16b do_rounds(p, sE0, sE1, T1, _, _, _, _, _, _, _, _) do_add(vT0, vW0, vK1) add vH4.2s, vH4.2s, vE0.2s add vABCD.4s, vABCD.4s, vH0123.4s do_add(vT1, vW1, vK1) mov vH0123.16b, vABCD.16b b .Loop .Lend: do_rounds(p, sE1, sE0, T0, K4, _ , _ , W2, W3, do_add, _, do_sha1su1) do_rounds(p, sE0, sE1, T1, K4, _ , _ , W3, _ , do_add, _, _) do_rounds(p, sE1, sE0, T0, _, _, _, _, _, _, _, _) do_rounds(p, sE0, sE1, T1, _, _, _, _, _, _, _, _) add vH4.2s, vH4.2s, vE0.2s add vH0123.4s, vH0123.4s, vABCD.4s CLEAR_REG(vW0) CLEAR_REG(vW1) CLEAR_REG(vW2) CLEAR_REG(vW3) CLEAR_REG(vABCD) CLEAR_REG(vE1) CLEAR_REG(vE0) str sH4, [x0, #16] /* store h4 */ st1 {vH0123.4s}, [x0] /* store h0,h1,h2,h3 */ CLEAR_REG(vH0123) CLEAR_REG(vH4) .Ldo_nothing: mov x0, #0 ret -.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce; +ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;) #endif diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S index a4575da4..6b3ad32d 100644 --- a/cipher/sha256-armv8-aarch64-ce.S +++ b/cipher/sha256-armv8-aarch64-ce.S @@ -1,218 +1,218 @@ /* sha256-armv8-aarch64-ce.S - ARM/CE accelerated SHA-256 transform function * Copyright (C) 2016 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && defined(USE_SHA256) .cpu generic+simd+crypto .text #define GET_DATA_POINTER(reg, name) \ adrp reg, :got:name ; \ ldr reg, [reg, #:got_lo12:name] ; /* Constants */ .align 4 gcry_sha256_aarch64_ce_K: .LK: .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 /* Register macros */ #define vH0123 v0 #define vH4567 v1 #define vABCD0 v2 #define qABCD0 q2 #define vABCD1 v3 #define qABCD1 q3 #define vEFGH v4 #define qEFGH q4 #define vT0 v5 #define vT1 v6 #define vW0 v16 #define vW1 v17 #define vW2 v18 #define vW3 v19 #define vK0 v20 #define vK1 v21 #define vK2 v22 #define vK3 v23 /* Round macros */ #define _(...) /*_*/ #define do_loadk(nk0, nk1) ld1 {nk0.16b-nk1.16b},[x3],#32; #define do_add(a, b) add a.4s, a.4s, b.4s; #define do_sha256su0(w0, w1) sha256su0 w0.4s, w1.4s; #define do_sha256su1(w0, w2, w3) sha256su1 w0.4s, w2.4s, w3.4s; #define do_rounds(k, nk0, nk1, w0, w1, w2, w3, loadk_fn, add_fn, su0_fn, su1_fn) \ loadk_fn( v##nk0, v##nk1 ); \ su0_fn( v##w0, v##w1 ); \ mov vABCD1.16b, vABCD0.16b; \ sha256h qABCD0, qEFGH, v##k.4s; \ sha256h2 qEFGH, qABCD1, v##k.4s; \ add_fn( v##nk0, v##w2 ); \ su1_fn( v##w0, v##w2, v##w3 ); /* Other functional macros */ #define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; /* * unsigned int * _gcry_sha256_transform_armv8_ce (u32 state[8], const void *input_data, * size_t num_blks) */ .align 3 .globl _gcry_sha256_transform_armv8_ce -.type _gcry_sha256_transform_armv8_ce,%function; +ELF(.type _gcry_sha256_transform_armv8_ce,%function;) _gcry_sha256_transform_armv8_ce: /* input: * r0: ctx, CTX * r1: data (64*nblks bytes) * r2: nblks */ cbz x2, .Ldo_nothing; GET_DATA_POINTER(x3, .LK); mov x4, x3 ld1 {vH0123.4s-vH4567.4s}, [x0] /* load state */ ld1 {vW0.16b-vW1.16b}, [x1], #32 do_loadk(vK0, vK1) ld1 {vW2.16b-vW3.16b}, [x1], #32 mov vABCD0.16b, vH0123.16b mov vEFGH.16b, vH4567.16b rev32 vW0.16b, vW0.16b rev32 vW1.16b, vW1.16b rev32 vW2.16b, vW2.16b do_add(vK0, vW0) rev32 vW3.16b, vW3.16b do_add(vK1, vW1) .Loop: do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1) sub x2,x2,#1 do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1) do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1) do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1) do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1) do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1) do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1) do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1) do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, do_sha256su1) do_rounds(K1, K3, _ , W1, W2, W3, W0, _ , do_add, do_sha256su0, do_sha256su1) do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, do_sha256su1) do_rounds(K3, K1, _ , W3, W0, W1, W2, _ , do_add, do_sha256su0, do_sha256su1) cbz x2, .Lend do_rounds(K0, K2, K3, W0, _ , W2, W3, do_loadk, do_add, _, _) ld1 {vW0.16b}, [x1], #16 mov x3, x4 do_rounds(K1, K3, _ , W1, _ , W3, _ , _ , do_add, _, _) ld1 {vW1.16b}, [x1], #16 rev32 vW0.16b, vW0.16b do_rounds(K2, K0, K1, W2, _ , W0, _ , do_loadk, do_add, _, _) rev32 vW1.16b, vW1.16b ld1 {vW2.16b}, [x1], #16 do_rounds(K3, K1, _ , W3, _ , W1, _ , _ , do_add, _, _) ld1 {vW3.16b}, [x1], #16 do_add(vH0123, vABCD0) do_add(vH4567, vEFGH) rev32 vW2.16b, vW2.16b mov vABCD0.16b, vH0123.16b rev32 vW3.16b, vW3.16b mov vEFGH.16b, vH4567.16b b .Loop .Lend: do_rounds(K0, K2, K3, W0, _ , W2, W3, do_loadk, do_add, _, _) do_rounds(K1, K3, _ , W1, _ , W3, _ , _ , do_add, _, _) do_rounds(K2, _ , _ , W2, _ , _ , _ , _ , _, _, _) do_rounds(K3, _ , _ , W3, _ , _ , _ , _ , _, _, _) CLEAR_REG(vW0) CLEAR_REG(vW1) CLEAR_REG(vW2) CLEAR_REG(vW3) CLEAR_REG(vK0) CLEAR_REG(vK1) CLEAR_REG(vK2) CLEAR_REG(vK3) do_add(vH0123, vABCD0) do_add(vH4567, vEFGH) CLEAR_REG(vABCD0) CLEAR_REG(vABCD1) CLEAR_REG(vEFGH) st1 {vH0123.4s-vH4567.4s}, [x0] /* store state */ CLEAR_REG(vH0123) CLEAR_REG(vH4567) .Ldo_nothing: mov x0, #0 ret -.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce; +ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;) #endif diff --git a/cipher/twofish-aarch64.S b/cipher/twofish-aarch64.S index 99c46752..adee412d 100644 --- a/cipher/twofish-aarch64.S +++ b/cipher/twofish-aarch64.S @@ -1,317 +1,317 @@ /* twofish-aarch64.S - ARMv8/AArch64 assembly implementation of Twofish cipher * * Copyright (C) 2016 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ -#include +#include "asm-common-aarch64.h" #if defined(__AARCH64EL__) #ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS .text /* structure of TWOFISH_context: */ #define s0 0 #define s1 ((s0) + 4 * 256) #define s2 ((s1) + 4 * 256) #define s3 ((s2) + 4 * 256) #define w ((s3) + 4 * 256) #define k ((w) + 4 * 8) /* register macros */ #define CTX x0 #define RDST x1 #define RSRC x2 #define CTXs0 CTX #define CTXs1 x3 #define CTXs2 x4 #define CTXs3 x5 #define CTXw x17 #define RA w6 #define RB w7 #define RC w8 #define RD w9 #define RX w10 #define RY w11 #define xRX x10 #define xRY x11 #define RMASK w12 #define RT0 w13 #define RT1 w14 #define RT2 w15 #define RT3 w16 #define xRT0 x13 #define xRT1 x14 #define xRT2 x15 #define xRT3 x16 /* helper macros */ #ifndef __AARCH64EL__ /* bswap on big-endian */ #define host_to_le(reg) \ rev reg, reg; #define le_to_host(reg) \ rev reg, reg; #else /* nop on little-endian */ #define host_to_le(reg) /*_*/ #define le_to_host(reg) /*_*/ #endif #define ldr_input_aligned_le(rin, a, b, c, d) \ ldr a, [rin, #0]; \ ldr b, [rin, #4]; \ le_to_host(a); \ ldr c, [rin, #8]; \ le_to_host(b); \ ldr d, [rin, #12]; \ le_to_host(c); \ le_to_host(d); #define str_output_aligned_le(rout, a, b, c, d) \ le_to_host(a); \ le_to_host(b); \ str a, [rout, #0]; \ le_to_host(c); \ str b, [rout, #4]; \ le_to_host(d); \ str c, [rout, #8]; \ str d, [rout, #12]; /* unaligned word reads/writes allowed */ #define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \ ldr_input_aligned_le(rin, ra, rb, rc, rd) #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ str_output_aligned_le(rout, ra, rb, rc, rd) /********************************************************************** 1-way twofish **********************************************************************/ #define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \ and RT0, RMASK, b, lsr#(8 - 2); \ and RY, RMASK, b, lsr#(16 - 2); \ and RT1, RMASK, b, lsr#(24 - 2); \ ldr RY, [CTXs3, xRY]; \ and RT2, RMASK, b, lsl#(2); \ ldr RT0, [CTXs2, xRT0]; \ and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \ ldr RT1, [CTXs0, xRT1]; \ and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \ ldr RT2, [CTXs1, xRT2]; \ ldr RX, [CTXs1, xRX]; \ ror_a(a); \ \ eor RY, RY, RT0; \ ldr RT3, [CTXs2, xRT3]; \ and RT0, RMASK, a, lsl#(2); \ eor RY, RY, RT1; \ and RT1, RMASK, a, lsr#(24 - 2); \ eor RY, RY, RT2; \ ldr RT0, [CTXs0, xRT0]; \ eor RX, RX, RT3; \ ldr RT1, [CTXs3, xRT1]; \ eor RX, RX, RT0; \ \ ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \ eor RX, RX, RT1; \ ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \ \ add RT0, RX, RY, lsl #1; \ add RX, RX, RY; \ add RT0, RT0, RT3; \ add RX, RX, RT2; \ eor rd, RT0, rd, ror #31; \ eor rc, rc, RX; #define dummy(x) /*_*/ #define ror1(r) \ ror r, r, #1; #define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \ and RT3, RMASK, b, lsl#(2 - (adj_b)); \ and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \ ror_b(b); \ and RT2, RMASK, a, lsl#(2); \ and RT0, RMASK, a, lsr#(8 - 2); \ \ ldr RY, [CTXs1, xRT3]; \ ldr RX, [CTXs0, xRT2]; \ and RT3, RMASK, b, lsr#(16 - 2); \ ldr RT1, [CTXs2, xRT1]; \ and RT2, RMASK, a, lsr#(16 - 2); \ ldr RT0, [CTXs1, xRT0]; \ \ ldr RT3, [CTXs3, xRT3]; \ eor RY, RY, RT1; \ \ and RT1, RMASK, b, lsr#(24 - 2); \ eor RX, RX, RT0; \ ldr RT2, [CTXs2, xRT2]; \ and RT0, RMASK, a, lsr#(24 - 2); \ \ ldr RT1, [CTXs0, xRT1]; \ \ eor RY, RY, RT3; \ ldr RT0, [CTXs3, xRT0]; \ eor RX, RX, RT2; \ eor RY, RY, RT1; \ \ ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \ eor RX, RX, RT0; \ ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \ \ add RT0, RX, RY, lsl #1; \ add RX, RX, RY; \ add RT0, RT0, RT1; \ add RX, RX, RT2; \ eor rd, rd, RT0; \ eor rc, RX, rc, ror #31; #define first_encrypt_cycle(nc) \ encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); #define encrypt_cycle(nc) \ encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); #define last_encrypt_cycle(nc) \ encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ ror1(RA); #define first_decrypt_cycle(nc) \ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); #define decrypt_cycle(nc) \ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); #define last_decrypt_cycle(nc) \ decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \ decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \ ror1(RD); .globl _gcry_twofish_arm_encrypt_block -.type _gcry_twofish_arm_encrypt_block,%function; +ELF(.type _gcry_twofish_arm_encrypt_block,%function;) _gcry_twofish_arm_encrypt_block: /* input: * x0: ctx * x1: dst * x2: src */ add CTXw, CTX, #(w); ldr_input_le(RSRC, RA, RB, RC, RD, RT0); /* Input whitening */ ldp RT0, RT1, [CTXw, #(0*8)]; ldp RT2, RT3, [CTXw, #(1*8)]; add CTXs3, CTX, #(s3); add CTXs2, CTX, #(s2); add CTXs1, CTX, #(s1); mov RMASK, #(0xff << 2); eor RA, RA, RT0; eor RB, RB, RT1; eor RC, RC, RT2; eor RD, RD, RT3; first_encrypt_cycle(0); encrypt_cycle(1); encrypt_cycle(2); encrypt_cycle(3); encrypt_cycle(4); encrypt_cycle(5); encrypt_cycle(6); last_encrypt_cycle(7); /* Output whitening */ ldp RT0, RT1, [CTXw, #(2*8)]; ldp RT2, RT3, [CTXw, #(3*8)]; eor RC, RC, RT0; eor RD, RD, RT1; eor RA, RA, RT2; eor RB, RB, RT3; str_output_le(RDST, RC, RD, RA, RB, RT0, RT1); ret; .ltorg -.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block; +ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;) .globl _gcry_twofish_arm_decrypt_block -.type _gcry_twofish_arm_decrypt_block,%function; +ELF(.type _gcry_twofish_arm_decrypt_block,%function;) _gcry_twofish_arm_decrypt_block: /* input: * %r0: ctx * %r1: dst * %r2: src */ add CTXw, CTX, #(w); ldr_input_le(RSRC, RC, RD, RA, RB, RT0); /* Input whitening */ ldp RT0, RT1, [CTXw, #(2*8)]; ldp RT2, RT3, [CTXw, #(3*8)]; add CTXs3, CTX, #(s3); add CTXs2, CTX, #(s2); add CTXs1, CTX, #(s1); mov RMASK, #(0xff << 2); eor RC, RC, RT0; eor RD, RD, RT1; eor RA, RA, RT2; eor RB, RB, RT3; first_decrypt_cycle(7); decrypt_cycle(6); decrypt_cycle(5); decrypt_cycle(4); decrypt_cycle(3); decrypt_cycle(2); decrypt_cycle(1); last_decrypt_cycle(0); /* Output whitening */ ldp RT0, RT1, [CTXw, #(0*8)]; ldp RT2, RT3, [CTXw, #(1*8)]; eor RA, RA, RT0; eor RB, RB, RT1; eor RC, RC, RT2; eor RD, RD, RT3; str_output_le(RDST, RA, RB, RC, RD, RT0, RT1); ret; -.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block; +ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;) #endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ #endif /*__AARCH64EL__*/ diff --git a/configure.ac b/configure.ac index b5d72111..330485f0 100644 --- a/configure.ac +++ b/configure.ac @@ -1,2731 +1,2727 @@ # Configure.ac script for Libgcrypt # Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, # 2007, 2008, 2009, 2011 Free Software Foundation, Inc. # Copyright (C) 2012-2017 g10 Code GmbH # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # (Process this file with autoconf to produce a configure script.) AC_REVISION($Revision$) AC_PREREQ(2.60) min_automake_version="1.14" # To build a release you need to create a tag with the version number # (git tag -s libgcrypt-n.m.k) and run "./autogen.sh --force". Please # bump the version number immediately after the release and do another # commit and push so that the git magic is able to work. See below # for the LT versions. m4_define(mym4_version_major, [1]) m4_define(mym4_version_minor, [9]) m4_define(mym4_version_micro, [0]) # Below is m4 magic to extract and compute the revision number, the # decimalized short revision number, a beta version string, and a flag # indicating a development version (mym4_isgit). Note that the m4 # processing is done by autoconf and not during the configure run. m4_define(mym4_version, [mym4_version_major.mym4_version_minor.mym4_version_micro]) m4_define([mym4_revision], m4_esyscmd([git rev-parse --short HEAD | tr -d '\n\r'])) m4_define([mym4_revision_dec], m4_esyscmd_s([echo $((0x$(echo ]mym4_revision[|head -c 4)))])) m4_define([mym4_betastring], m4_esyscmd_s([git describe --match 'libgcrypt-[0-9].*[0-9]' --long|\ awk -F- '$3!=0{print"-beta"$3}'])) m4_define([mym4_isgit],m4_if(mym4_betastring,[],[no],[yes])) m4_define([mym4_full_version],[mym4_version[]mym4_betastring]) AC_INIT([libgcrypt],[mym4_full_version],[http://bugs.gnupg.org]) # LT Version numbers, remember to change them just *before* a release. # (Interfaces removed: CURRENT++, AGE=0, REVISION=0) # (Interfaces added: CURRENT++, AGE++, REVISION=0) # (No interfaces changed: REVISION++) LIBGCRYPT_LT_CURRENT=23 LIBGCRYPT_LT_AGE=3 LIBGCRYPT_LT_REVISION=0 # If the API is changed in an incompatible way: increment the next counter. # # 1.6: ABI and API change but the change is to most users irrelevant # and thus the API version number has not been incremented. LIBGCRYPT_CONFIG_API_VERSION=1 # If you change the required gpg-error version, please remove # unnecessary error code defines in src/gcrypt-int.h. NEED_GPG_ERROR_VERSION=1.25 PACKAGE=$PACKAGE_NAME VERSION=$PACKAGE_VERSION AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_SRCDIR([src/libgcrypt.vers]) AM_INIT_AUTOMAKE([serial-tests dist-bzip2]) AC_CONFIG_HEADER(config.h) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_LIBOBJ_DIR([compat]) AC_CANONICAL_HOST AM_MAINTAINER_MODE AM_SILENT_RULES AC_ARG_VAR(SYSROOT,[locate config scripts also below that directory]) AH_TOP([ #ifndef _GCRYPT_CONFIG_H_INCLUDED #define _GCRYPT_CONFIG_H_INCLUDED /* Enable gpg-error's strerror macro for W32CE. */ #define GPG_ERR_ENABLE_ERRNO_MACROS 1 ]) AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK # if defined(__BIG_ENDIAN__) # define WORDS_BIGENDIAN 1 # elif defined(__LITTLE_ENDIAN__) # undef WORDS_BIGENDIAN # else # error "No endianness found" # endif #endif /*DISABLED_ENDIAN_CHECK*/ /* We basically use the original Camellia source. Make sure the symbols properly prefixed. */ #define CAMELLIA_EXT_SYM_PREFIX _gcry_ #endif /*_GCRYPT_CONFIG_H_INCLUDED*/ ]) AH_VERBATIM([_REENTRANT], [/* To allow the use of Libgcrypt in multithreaded programs we have to use special features from the library. */ #ifndef _REENTRANT # define _REENTRANT 1 #endif ]) AC_SUBST(LIBGCRYPT_LT_CURRENT) AC_SUBST(LIBGCRYPT_LT_AGE) AC_SUBST(LIBGCRYPT_LT_REVISION) AC_SUBST(PACKAGE) AC_SUBST(VERSION) AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of this package]) AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version of this package]) VERSION_NUMBER=m4_esyscmd(printf "0x%02x%02x%02x" mym4_version_major \ mym4_version_minor mym4_version_micro) AC_SUBST(VERSION_NUMBER) ###################### ## Basic checks. ### (we need some results later on (e.g. $GCC) ###################### AC_PROG_MAKE_SET missing_dir=`cd $ac_aux_dir && pwd` AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) # AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir) AC_PROG_CC AC_PROG_CPP AM_PROG_CC_C_O AM_PROG_AS AC_ISC_POSIX AC_PROG_INSTALL AC_PROG_AWK AC_GNU_SOURCE # We need to compile and run a program on the build machine. A # comment in libgpg-error says that the AC_PROG_CC_FOR_BUILD macro in # the AC archive is broken for autoconf 2.57. Given that there is no # newer version of that macro, we assume that it is also broken for # autoconf 2.61 and thus we use a simple but usually sufficient # approach. AC_MSG_CHECKING(for cc for build) if test "$cross_compiling" = "yes"; then CC_FOR_BUILD="${CC_FOR_BUILD-cc}" else CC_FOR_BUILD="${CC_FOR_BUILD-$CC}" fi AC_MSG_RESULT($CC_FOR_BUILD) AC_ARG_VAR(CC_FOR_BUILD,[build system C compiler]) LT_PREREQ([2.2.6]) LT_INIT([win32-dll disable-static]) LT_LANG([Windows Resource]) ########################## ## General definitions. ## ########################## # Used by libgcrypt-config LIBGCRYPT_CONFIG_LIBS="-lgcrypt" LIBGCRYPT_CONFIG_CFLAGS="" LIBGCRYPT_CONFIG_HOST="$host" # Definitions for symmetric ciphers. available_ciphers="arcfour blowfish cast5 des aes twofish serpent rfc2268 seed" available_ciphers="$available_ciphers camellia idea salsa20 gost28147 chacha20" enabled_ciphers="" # Definitions for public-key ciphers. available_pubkey_ciphers="dsa elgamal rsa ecc" enabled_pubkey_ciphers="" # Definitions for message digests. available_digests="crc gostr3411-94 md2 md4 md5 rmd160 sha1 sha256 sha512" available_digests="$available_digests sha3 tiger whirlpool stribog blake2" available_digests="$available_digests sm3" enabled_digests="" # Definitions for kdfs (optional ones) available_kdfs="s2k pkdf2 scrypt" enabled_kdfs="" # Definitions for random modules. available_random_modules="linux egd unix" auto_random_modules="$available_random_modules" # Supported thread backends. LIBGCRYPT_THREAD_MODULES="" # Other definitions. have_w32_system=no have_w32ce_system=no have_pthread=no # Setup some stuff depending on host. case "${host}" in *-*-mingw32*) ac_cv_have_dev_random=no have_w32_system=yes case "${host}" in *-mingw32ce*) have_w32ce_system=yes available_random_modules="w32ce" ;; *) available_random_modules="w32" ;; esac AC_DEFINE(USE_ONLY_8DOT3,1, [set this to limit filenames to the 8.3 format]) AC_DEFINE(HAVE_DRIVE_LETTERS,1, [defined if we must run on a stupid file system]) AC_DEFINE(HAVE_DOSISH_SYSTEM,1, [defined if we run on some of the PCDOS like systems (DOS, Windoze. OS/2) with special properties like no file modes]) ;; i?86-emx-os2 | i?86-*-os2*emx) # OS/2 with the EMX environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; i?86-*-msdosdjgpp*) # DOS with the DJGPP environment ac_cv_have_dev_random=no AC_DEFINE(HAVE_DRIVE_LETTERS) AC_DEFINE(HAVE_DOSISH_SYSTEM) ;; *-*-hpux*) if test -z "$GCC" ; then CFLAGS="$CFLAGS -Ae -D_HPUX_SOURCE" fi ;; *-dec-osf4*) if test -z "$GCC" ; then # Suppress all warnings # to get rid of the unsigned/signed char mismatch warnings. CFLAGS="$CFLAGS -w" fi ;; m68k-atari-mint) ;; *-apple-darwin*) AC_DEFINE(_DARWIN_C_SOURCE, 900000L, Expose all libc features (__DARWIN_C_FULL).) ;; *) ;; esac if test "$have_w32_system" = yes; then AC_DEFINE(HAVE_W32_SYSTEM,1, [Defined if we run on a W32 API based system]) if test "$have_w32ce_system" = yes; then AC_DEFINE(HAVE_W32CE_SYSTEM,1,[Defined if we run on WindowsCE]) fi fi AM_CONDITIONAL(HAVE_W32_SYSTEM, test "$have_w32_system" = yes) AM_CONDITIONAL(HAVE_W32CE_SYSTEM, test "$have_w32ce_system" = yes) # A printable OS Name is sometimes useful. case "${host}" in *-*-mingw32ce*) PRINTABLE_OS_NAME="W32CE" ;; *-*-mingw32*) PRINTABLE_OS_NAME="W32" ;; i?86-emx-os2 | i?86-*-os2*emx ) PRINTABLE_OS_NAME="OS/2" ;; i?86-*-msdosdjgpp*) PRINTABLE_OS_NAME="MSDOS/DJGPP" ;; *-linux*) PRINTABLE_OS_NAME="GNU/Linux" ;; *) PRINTABLE_OS_NAME=`uname -s || echo "Unknown"` ;; esac NAME_OF_DEV_RANDOM="/dev/random" NAME_OF_DEV_URANDOM="/dev/urandom" AC_ARG_ENABLE(endian-check, AC_HELP_STRING([--disable-endian-check], [disable the endian check and trust the OS provided macros]), endiancheck=$enableval,endiancheck=yes) if test x"$endiancheck" = xyes ; then AC_C_BIGENDIAN else AC_DEFINE(DISABLED_ENDIAN_CHECK,1,[configure did not test for endianness]) fi AC_CHECK_SIZEOF(unsigned short, 2) AC_CHECK_SIZEOF(unsigned int, 4) AC_CHECK_SIZEOF(unsigned long, 4) AC_CHECK_SIZEOF(unsigned long long, 0) AC_CHECK_SIZEOF(void *, 0) AC_TYPE_UINTPTR_T if test "$ac_cv_sizeof_unsigned_short" = "0" \ || test "$ac_cv_sizeof_unsigned_int" = "0" \ || test "$ac_cv_sizeof_unsigned_long" = "0"; then AC_MSG_WARN([Hmmm, something is wrong with the sizes - using defaults]); fi # Ensure that we have UINT64_C before we bother to check for uint64_t AC_CACHE_CHECK([for UINT64_C],[gnupg_cv_uint64_c_works], AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[uint64_t foo=UINT64_C(42);]])], gnupg_cv_uint64_c_works=yes,gnupg_cv_uint64_c_works=no)) if test "$gnupg_cv_uint64_c_works" = "yes" ; then AC_CHECK_SIZEOF(uint64_t) fi # Do we have any 64-bit data types? if test "$ac_cv_sizeof_unsigned_int" != "8" \ && test "$ac_cv_sizeof_unsigned_long" != "8" \ && test "$ac_cv_sizeof_unsigned_long_long" != "8" \ && test "$ac_cv_sizeof_uint64_t" != "8"; then AC_MSG_ERROR([[ *** *** No 64-bit integer type available. *** It is not possible to build Libgcrypt on this platform. ***]]) fi # If not specified otherwise, all available algorithms will be # included. default_ciphers="$available_ciphers" default_pubkey_ciphers="$available_pubkey_ciphers" default_digests="$available_digests" default_kdfs="$available_kdfs" # Blacklist MD2 by default default_digests=`echo $default_digests | sed -e 's/md2//g'` # Substitutions to set generated files in a Emacs buffer to read-only. AC_SUBST(emacs_local_vars_begin, ['Local Variables:']) AC_SUBST(emacs_local_vars_read_only, ['buffer-read-only: t']) AC_SUBST(emacs_local_vars_end, ['End:']) ############################ ## Command line switches. ## ############################ # Implementation of the --enable-ciphers switch. AC_ARG_ENABLE(ciphers, AC_HELP_STRING([--enable-ciphers=ciphers], [select the symmetric ciphers to include]), [enabled_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_ciphers=""]) if test "x$enabled_ciphers" = "x" \ -o "$enabled_ciphers" = "yes" \ -o "$enabled_ciphers" = "no"; then enabled_ciphers=$default_ciphers fi AC_MSG_CHECKING([which symmetric ciphers to include]) for cipher in $enabled_ciphers; do LIST_MEMBER($cipher, $available_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported cipher "$cipher" specified]) fi done AC_MSG_RESULT([$enabled_ciphers]) # Implementation of the --enable-pubkey-ciphers switch. AC_ARG_ENABLE(pubkey-ciphers, AC_HELP_STRING([--enable-pubkey-ciphers=ciphers], [select the public-key ciphers to include]), [enabled_pubkey_ciphers=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_pubkey_ciphers=""]) if test "x$enabled_pubkey_ciphers" = "x" \ -o "$enabled_pubkey_ciphers" = "yes" \ -o "$enabled_pubkey_ciphers" = "no"; then enabled_pubkey_ciphers=$default_pubkey_ciphers fi AC_MSG_CHECKING([which public-key ciphers to include]) for cipher in $enabled_pubkey_ciphers; do LIST_MEMBER($cipher, $available_pubkey_ciphers) if test "$found" = "0"; then AC_MSG_ERROR([unsupported public-key cipher specified]) fi done AC_MSG_RESULT([$enabled_pubkey_ciphers]) # Implementation of the --enable-digests switch. AC_ARG_ENABLE(digests, AC_HELP_STRING([--enable-digests=digests], [select the message digests to include]), [enabled_digests=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_digests=""]) if test "x$enabled_digests" = "x" \ -o "$enabled_digests" = "yes" \ -o "$enabled_digests" = "no"; then enabled_digests=$default_digests fi AC_MSG_CHECKING([which message digests to include]) for digest in $enabled_digests; do LIST_MEMBER($digest, $available_digests) if test "$found" = "0"; then AC_MSG_ERROR([unsupported message digest specified]) fi done AC_MSG_RESULT([$enabled_digests]) # Implementation of the --enable-kdfs switch. AC_ARG_ENABLE(kdfs, AC_HELP_STRING([--enable-kfds=kdfs], [select the KDFs to include]), [enabled_kdfs=`echo $enableval | tr ',:' ' ' | tr '[A-Z]' '[a-z]'`], [enabled_kdfs=""]) if test "x$enabled_kdfs" = "x" \ -o "$enabled_kdfs" = "yes" \ -o "$enabled_kdfs" = "no"; then enabled_kdfs=$default_kdfs fi AC_MSG_CHECKING([which key derivation functions to include]) for kdf in $enabled_kdfs; do LIST_MEMBER($kdf, $available_kdfs) if test "$found" = "0"; then AC_MSG_ERROR([unsupported key derivation function specified]) fi done AC_MSG_RESULT([$enabled_kdfs]) # Implementation of the --enable-random switch. AC_ARG_ENABLE(random, AC_HELP_STRING([--enable-random=name], [select which random number generator to use]), [random=`echo $enableval | tr '[A-Z]' '[a-z]'`], []) if test "x$random" = "x" -o "$random" = "yes" -o "$random" = "no"; then random=default fi AC_MSG_CHECKING([which random module to use]) if test "$random" != "default" -a "$random" != "auto"; then LIST_MEMBER($random, $available_random_modules) if test "$found" = "0"; then AC_MSG_ERROR([unsupported random module specified]) fi fi AC_MSG_RESULT($random) # Implementation of the --disable-dev-random switch. AC_MSG_CHECKING([whether use of /dev/random is requested]) AC_ARG_ENABLE(dev-random, [ --disable-dev-random disable the use of dev random], try_dev_random=$enableval, try_dev_random=yes) AC_MSG_RESULT($try_dev_random) # Implementation of the --with-egd-socket switch. AC_ARG_WITH(egd-socket, [ --with-egd-socket=NAME Use NAME for the EGD socket)], egd_socket_name="$withval", egd_socket_name="" ) AC_DEFINE_UNQUOTED(EGD_SOCKET_NAME, "$egd_socket_name", [Define if you don't want the default EGD socket name. For details see cipher/rndegd.c]) # Implementation of the --enable-random-daemon AC_MSG_CHECKING([whether the experimental random daemon is requested]) AC_ARG_ENABLE([random-daemon], AC_HELP_STRING([--enable-random-daemon], [Build and support the experimental gcryptrnd]), [use_random_daemon=$enableval], [use_random_daemon=no]) AC_MSG_RESULT($use_random_daemon) if test x$use_random_daemon = xyes ; then AC_DEFINE(USE_RANDOM_DAEMON,1, [Define to support the experimental random daemon]) fi AM_CONDITIONAL(USE_RANDOM_DAEMON, test x$use_random_daemon = xyes) # Implementation of --disable-asm. AC_MSG_CHECKING([whether MPI assembler modules are requested]) AC_ARG_ENABLE([asm], AC_HELP_STRING([--disable-asm], [Disable MPI assembler modules]), [try_asm_modules=$enableval], [try_asm_modules=yes]) AC_MSG_RESULT($try_asm_modules) # Implementation of the --enable-m-guard switch. AC_MSG_CHECKING([whether memory guard is requested]) AC_ARG_ENABLE(m-guard, AC_HELP_STRING([--enable-m-guard], [Enable memory guard facility]), [use_m_guard=$enableval], [use_m_guard=no]) AC_MSG_RESULT($use_m_guard) if test "$use_m_guard" = yes ; then AC_DEFINE(M_GUARD,1,[Define to use the (obsolete) malloc guarding feature]) fi # Implementation of the --enable-large-data-tests switch. AC_MSG_CHECKING([whether to run large data tests]) AC_ARG_ENABLE(large-data-tests, AC_HELP_STRING([--enable-large-data-tests], [Enable the real long ruinning large data tests]), large_data_tests=$enableval,large_data_tests=no) AC_MSG_RESULT($large_data_tests) AC_SUBST(RUN_LARGE_DATA_TESTS, $large_data_tests) # Implementation of the --with-capabilities switch. # Check whether we want to use Linux capabilities AC_MSG_CHECKING([whether use of capabilities is requested]) AC_ARG_WITH(capabilities, AC_HELP_STRING([--with-capabilities], [Use linux capabilities [default=no]]), [use_capabilities="$withval"],[use_capabilities=no]) AC_MSG_RESULT($use_capabilities) # Implementation of the --enable-hmac-binary-check. AC_MSG_CHECKING([whether a HMAC binary check is requested]) AC_ARG_ENABLE(hmac-binary-check, AC_HELP_STRING([--enable-hmac-binary-check], [Enable library integrity check]), [use_hmac_binary_check=$enableval], [use_hmac_binary_check=no]) AC_MSG_RESULT($use_hmac_binary_check) if test "$use_hmac_binary_check" = yes ; then AC_DEFINE(ENABLE_HMAC_BINARY_CHECK,1, [Define to support an HMAC based integrity check]) fi # Implementation of the --disable-jent-support switch. AC_MSG_CHECKING([whether jitter entropy support is requested]) AC_ARG_ENABLE(jent-support, AC_HELP_STRING([--disable-jent-support], [Disable support for the Jitter entropy collector]), jentsupport=$enableval,jentsupport=yes) AC_MSG_RESULT($jentsupport) # Implementation of the --disable-padlock-support switch. AC_MSG_CHECKING([whether padlock support is requested]) AC_ARG_ENABLE(padlock-support, AC_HELP_STRING([--disable-padlock-support], [Disable support for the PadLock Engine of VIA processors]), padlocksupport=$enableval,padlocksupport=yes) AC_MSG_RESULT($padlocksupport) # Implementation of the --disable-aesni-support switch. AC_MSG_CHECKING([whether AESNI support is requested]) AC_ARG_ENABLE(aesni-support, AC_HELP_STRING([--disable-aesni-support], [Disable support for the Intel AES-NI instructions]), aesnisupport=$enableval,aesnisupport=yes) AC_MSG_RESULT($aesnisupport) # Implementation of the --disable-shaext-support switch. AC_MSG_CHECKING([whether SHAEXT support is requested]) AC_ARG_ENABLE(shaext-support, AC_HELP_STRING([--disable-shaext-support], [Disable support for the Intel SHAEXT instructions]), shaextsupport=$enableval,shaextsupport=yes) AC_MSG_RESULT($shaextsupport) # Implementation of the --disable-pclmul-support switch. AC_MSG_CHECKING([whether PCLMUL support is requested]) AC_ARG_ENABLE(pclmul-support, AC_HELP_STRING([--disable-pclmul-support], [Disable support for the Intel PCLMUL instructions]), pclmulsupport=$enableval,pclmulsupport=yes) AC_MSG_RESULT($pclmulsupport) # Implementation of the --disable-sse41-support switch. AC_MSG_CHECKING([whether SSE4.1 support is requested]) AC_ARG_ENABLE(sse41-support, AC_HELP_STRING([--disable-sse41-support], [Disable support for the Intel SSE4.1 instructions]), sse41support=$enableval,sse41support=yes) AC_MSG_RESULT($sse41support) # Implementation of the --disable-drng-support switch. AC_MSG_CHECKING([whether DRNG support is requested]) AC_ARG_ENABLE(drng-support, AC_HELP_STRING([--disable-drng-support], [Disable support for the Intel DRNG (RDRAND instruction)]), drngsupport=$enableval,drngsupport=yes) AC_MSG_RESULT($drngsupport) # Implementation of the --disable-avx-support switch. AC_MSG_CHECKING([whether AVX support is requested]) AC_ARG_ENABLE(avx-support, AC_HELP_STRING([--disable-avx-support], [Disable support for the Intel AVX instructions]), avxsupport=$enableval,avxsupport=yes) AC_MSG_RESULT($avxsupport) # Implementation of the --disable-avx2-support switch. AC_MSG_CHECKING([whether AVX2 support is requested]) AC_ARG_ENABLE(avx2-support, AC_HELP_STRING([--disable-avx2-support], [Disable support for the Intel AVX2 instructions]), avx2support=$enableval,avx2support=yes) AC_MSG_RESULT($avx2support) # Implementation of the --disable-neon-support switch. AC_MSG_CHECKING([whether NEON support is requested]) AC_ARG_ENABLE(neon-support, AC_HELP_STRING([--disable-neon-support], [Disable support for the ARM NEON instructions]), neonsupport=$enableval,neonsupport=yes) AC_MSG_RESULT($neonsupport) # Implementation of the --disable-arm-crypto-support switch. AC_MSG_CHECKING([whether ARMv8 Crypto Extension support is requested]) AC_ARG_ENABLE(arm-crypto-support, AC_HELP_STRING([--disable-arm-crypto-support], [Disable support for the ARMv8 Crypto Extension instructions]), armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], AC_HELP_STRING([--disable-O-flag-munging], [Disable modification of the cc -O flag]), [enable_o_flag_munging=$enableval], [enable_o_flag_munging=yes]) AC_MSG_RESULT($enable_o_flag_munging) AM_CONDITIONAL(ENABLE_O_FLAG_MUNGING, test "$enable_o_flag_munging" = "yes") # Implementation of the --disable-amd64-as-feature-detection switch. AC_MSG_CHECKING([whether to enable AMD64 as(1) feature detection]) AC_ARG_ENABLE(amd64-as-feature-detection, AC_HELP_STRING([--disable-amd64-as-feature-detection], [Disable the auto-detection of AMD64 as(1) features]), amd64_as_feature_detection=$enableval, amd64_as_feature_detection=yes) AC_MSG_RESULT($amd64_as_feature_detection) AC_DEFINE_UNQUOTED(PRINTABLE_OS_NAME, "$PRINTABLE_OS_NAME", [A human readable text with the name of the OS]) # For some systems we know that we have ld_version scripts. # Use it then as default. have_ld_version_script=no case "${host}" in *-*-linux*) have_ld_version_script=yes ;; *-*-gnu*) have_ld_version_script=yes ;; esac AC_ARG_ENABLE([ld-version-script], AC_HELP_STRING([--enable-ld-version-script], [enable/disable use of linker version script. (default is system dependent)]), [have_ld_version_script=$enableval], [ : ] ) AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$have_ld_version_script" = "yes") AC_DEFINE_UNQUOTED(NAME_OF_DEV_RANDOM, "$NAME_OF_DEV_RANDOM", [defined to the name of the strong random device]) AC_DEFINE_UNQUOTED(NAME_OF_DEV_URANDOM, "$NAME_OF_DEV_URANDOM", [defined to the name of the weaker random device]) ############################### #### Checks for libraries. #### ############################### # # gpg-error is required. # AM_PATH_GPG_ERROR("$NEED_GPG_ERROR_VERSION") if test "x$GPG_ERROR_LIBS" = "x"; then AC_MSG_ERROR([libgpg-error is needed. See ftp://ftp.gnupg.org/gcrypt/libgpg-error/ .]) fi AC_DEFINE(GPG_ERR_SOURCE_DEFAULT, GPG_ERR_SOURCE_GCRYPT, [The default error source for libgcrypt.]) # # Check whether the GNU Pth library is available. We require this # to build the optional gcryptrnd program. # AC_ARG_WITH(pth-prefix, AC_HELP_STRING([--with-pth-prefix=PFX], [prefix where GNU Pth is installed (optional)]), pth_config_prefix="$withval", pth_config_prefix="") if test x$pth_config_prefix != x ; then PTH_CONFIG="$pth_config_prefix/bin/pth-config" fi if test "$use_random_daemon" = "yes"; then AC_PATH_PROG(PTH_CONFIG, pth-config, no) if test "$PTH_CONFIG" = "no"; then AC_MSG_WARN([[ *** *** To build the Libgcrypt's random number daemon *** we need the support of the GNU Portable Threads Library. *** Download it from ftp://ftp.gnu.org/gnu/pth/ *** On a Debian GNU/Linux system you might want to try *** apt-get install libpth-dev ***]]) else GNUPG_PTH_VERSION_CHECK([1.3.7]) if test $have_pth = yes; then PTH_CFLAGS=`$PTH_CONFIG --cflags` PTH_LIBS=`$PTH_CONFIG --ldflags` PTH_LIBS="$PTH_LIBS `$PTH_CONFIG --libs --all`" AC_DEFINE(USE_GNU_PTH, 1, [Defined if the GNU Portable Thread Library should be used]) AC_DEFINE(HAVE_PTH, 1, [Defined if the GNU Pth is available]) fi fi fi AC_SUBST(PTH_CFLAGS) AC_SUBST(PTH_LIBS) # # Check whether pthreads is available # if test "$have_w32_system" != yes; then AC_CHECK_LIB(pthread,pthread_create,have_pthread=yes) if test "$have_pthread" = yes; then AC_DEFINE(HAVE_PTHREAD, 1 ,[Define if we have pthread.]) fi fi # Solaris needs -lsocket and -lnsl. Unisys system includes # gethostbyname in libsocket but needs libnsl for socket. AC_SEARCH_LIBS(setsockopt, [socket], , [AC_SEARCH_LIBS(setsockopt, [socket], , , [-lnsl])]) AC_SEARCH_LIBS(setsockopt, [nsl]) ################################## #### Checks for header files. #### ################################## AC_HEADER_STDC AC_CHECK_HEADERS(unistd.h sys/select.h sys/msg.h) INSERT_SYS_SELECT_H= if test x"$ac_cv_header_sys_select_h" = xyes; then INSERT_SYS_SELECT_H=" include " fi AC_SUBST(INSERT_SYS_SELECT_H) ########################################## #### Checks for typedefs, structures, #### #### and compiler characteristics. #### ########################################## AC_C_CONST AC_C_INLINE AC_TYPE_SIZE_T AC_TYPE_SIGNAL AC_DECL_SYS_SIGLIST AC_TYPE_PID_T GNUPG_CHECK_TYPEDEF(byte, HAVE_BYTE_TYPEDEF) GNUPG_CHECK_TYPEDEF(ushort, HAVE_USHORT_TYPEDEF) GNUPG_CHECK_TYPEDEF(ulong, HAVE_ULONG_TYPEDEF) GNUPG_CHECK_TYPEDEF(u16, HAVE_U16_TYPEDEF) GNUPG_CHECK_TYPEDEF(u32, HAVE_U32_TYPEDEF) gl_TYPE_SOCKLEN_T case "${host}" in *-*-mingw32*) # socklen_t may or may not be defined depending on what headers # are included. To be safe we use int as this is the actual type. FALLBACK_SOCKLEN_T="typedef int gcry_socklen_t;" ;; *) if test ".$gl_cv_socklen_t_equiv" = "."; then FALLBACK_SOCKLEN_T="typedef socklen_t gcry_socklen_t;" else FALLBACK_SOCKLEN_T="typedef ${gl_cv_socklen_t_equiv} gcry_socklen_t;" fi esac AC_SUBST(FALLBACK_SOCKLEN_T) # # Check for __builtin_bswap32 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap32, [gcry_cv_have_builtin_bswap32], [gcry_cv_have_builtin_bswap32=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [int x = 0; int y = __builtin_bswap32(x); return y;])], [gcry_cv_have_builtin_bswap32=yes])]) if test "$gcry_cv_have_builtin_bswap32" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP32,1, [Defined if compiler has '__builtin_bswap32' intrinsic]) fi # # Check for __builtin_bswap64 intrinsic. # AC_CACHE_CHECK(for __builtin_bswap64, [gcry_cv_have_builtin_bswap64], [gcry_cv_have_builtin_bswap64=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [long long x = 0; long long y = __builtin_bswap64(x); return y;])], [gcry_cv_have_builtin_bswap64=yes])]) if test "$gcry_cv_have_builtin_bswap64" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_BSWAP64,1, [Defined if compiler has '__builtin_bswap64' intrinsic]) fi # # Check for __builtin_ctz intrinsic. # AC_CACHE_CHECK(for __builtin_ctz, [gcry_cv_have_builtin_ctz], [gcry_cv_have_builtin_ctz=no AC_LINK_IFELSE([AC_LANG_PROGRAM([], [unsigned int x = 0; int y = __builtin_ctz(x); return y;])], [gcry_cv_have_builtin_ctz=yes])]) if test "$gcry_cv_have_builtin_ctz" = "yes" ; then AC_DEFINE(HAVE_BUILTIN_CTZ, 1, [Defined if compiler has '__builtin_ctz' intrinsic]) fi # # Check for VLA support (variable length arrays). # AC_CACHE_CHECK(whether the variable length arrays are supported, [gcry_cv_have_vla], [gcry_cv_have_vla=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void f1(char *, int); char foo(int i) { char b[(i < 0 ? 0 : i) + 1]; f1(b, sizeof b); return b[0];}]])], [gcry_cv_have_vla=yes])]) if test "$gcry_cv_have_vla" = "yes" ; then AC_DEFINE(HAVE_VLA,1, [Defined if variable length arrays are supported]) fi # # Check for ELF visibility support. # AC_CACHE_CHECK(whether the visibility attribute is supported, gcry_cv_visibility_attribute, [gcry_cv_visibility_attribute=no AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo __attribute__ ((visibility ("hidden"))) = 1; int bar __attribute__ ((visibility ("protected"))) = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden.*foo' conftest.s >/dev/null 2>&1 ; then if grep '\.protected.*bar' conftest.s >/dev/null 2>&1; then gcry_cv_visibility_attribute=yes fi fi fi ]) if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken visibility attribute, gcry_cv_broken_visibility_attribute, [gcry_cv_broken_visibility_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[int foo (int x); int bar (int x) __asm__ ("foo") __attribute__ ((visibility ("hidden"))); int bar (int x) { return x; } ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep '\.hidden@<:@ _@:>@foo' conftest.s >/dev/null 2>&1; then gcry_cv_broken_visibility_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(for broken alias attribute, gcry_cv_broken_alias_attribute, [gcry_cv_broken_alias_attribute=yes AC_LANG_CONFTEST([AC_LANG_SOURCE( [[extern int foo (int x) __asm ("xyzzy"); int bar (int x) { return x; } extern __typeof (bar) foo __attribute ((weak, alias ("bar"))); extern int dfoo; extern __typeof (dfoo) dfoo __asm ("abccb"); int dfoo = 1; ]])]) if ${CC-cc} -Werror -S conftest.c -o conftest.s \ 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then if grep 'xyzzy' conftest.s >/dev/null 2>&1 && \ grep 'abccb' conftest.s >/dev/null 2>&1; then gcry_cv_broken_alias_attribute=no fi fi ]) fi if test "$gcry_cv_visibility_attribute" = "yes"; then AC_CACHE_CHECK(if gcc supports -fvisibility=hidden, gcry_cv_gcc_has_f_visibility, [gcry_cv_gcc_has_f_visibility=no _gcc_cflags_save=$CFLAGS CFLAGS="-fvisibility=hidden" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])], gcry_cv_gcc_has_f_visibility=yes) CFLAGS=$_gcc_cflags_save; ]) fi if test "$gcry_cv_visibility_attribute" = "yes" \ && test "$gcry_cv_broken_visibility_attribute" != "yes" \ && test "$gcry_cv_broken_alias_attribute" != "yes" \ && test "$gcry_cv_gcc_has_f_visibility" = "yes" then AC_DEFINE(GCRY_USE_VISIBILITY, 1, [Define to use the GNU C visibility attribute.]) CFLAGS="$CFLAGS -fvisibility=hidden" fi # Following attribute tests depend on warnings to cause compile to fail, # so set -Werror temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether the compiler supports the GCC style aligned attribute # AC_CACHE_CHECK([whether the GCC style aligned attribute is supported], [gcry_cv_gcc_attribute_aligned], [gcry_cv_gcc_attribute_aligned=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct { int a; } foo __attribute__ ((aligned (16)));]])], [gcry_cv_gcc_attribute_aligned=yes])]) if test "$gcry_cv_gcc_attribute_aligned" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_ALIGNED,1, [Defined if a GCC style "__attribute__ ((aligned (n))" is supported]) fi # # Check whether the compiler supports the GCC style packed attribute # AC_CACHE_CHECK([whether the GCC style packed attribute is supported], [gcry_cv_gcc_attribute_packed], [gcry_cv_gcc_attribute_packed=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[struct foolong_s { long b; } __attribute__ ((packed)); struct foo_s { char a; struct foolong_s b; } __attribute__ ((packed)); enum bar { FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))), };]])], [gcry_cv_gcc_attribute_packed=yes])]) if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1, [Defined if a GCC style "__attribute__ ((packed))" is supported]) fi # # Check whether the compiler supports the GCC style may_alias attribute # AC_CACHE_CHECK([whether the GCC style may_alias attribute is supported], [gcry_cv_gcc_attribute_may_alias], [gcry_cv_gcc_attribute_may_alias=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[typedef struct foo_s { int a; } __attribute__ ((may_alias)) foo_t;]])], [gcry_cv_gcc_attribute_may_alias=yes])]) if test "$gcry_cv_gcc_attribute_may_alias" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MAY_ALIAS,1, [Defined if a GCC style "__attribute__ ((may_alias))" is supported]) fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether the compiler supports 'asm' or '__asm__' keyword for # assembler blocks. # AC_CACHE_CHECK([whether 'asm' assembler keyword is supported], [gcry_cv_have_asm], [gcry_cv_have_asm=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm("":::"memory"); }]])], [gcry_cv_have_asm=yes])]) AC_CACHE_CHECK([whether '__asm__' assembler keyword is supported], [gcry_cv_have___asm__], [gcry_cv_have___asm__=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("":::"memory"); }]])], [gcry_cv_have___asm__=yes])]) if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_DEFINE(asm,__asm__, [Define to supported assembler block keyword, if plain 'asm' was not supported]) fi fi # # Check whether the compiler supports inline assembly memory barrier. # if test "$gcry_cv_have_asm" = "no" ; then if test "$gcry_cv_have___asm__" = "yes" ; then AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__ volatile("":::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi else AC_CACHE_CHECK([whether inline assembly memory barrier is supported], [gcry_cv_have_asm_volatile_memory], [gcry_cv_have_asm_volatile_memory=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { asm volatile("":::"memory"); }]])], [gcry_cv_have_asm_volatile_memory=yes])]) fi if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then AC_DEFINE(HAVE_GCC_ASM_VOLATILE_MEMORY,1, [Define if inline asm memory barrier is supported]) fi # # Check whether GCC assembler supports features needed for our ARM # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly implementations], [gcry_cv_gcc_arm_platform_as_ok], [gcry_cv_gcc_arm_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( /* Test if assembler supports UAL syntax. */ ".syntax unified\n\t" ".arm\n\t" /* our assembly code is in ARM mode */ /* Following causes error if assembler ignored '.syntax unified'. */ "asmfunc:\n\t" "add %r0, %r0, %r4, ror #12;\n\t" /* Test if '.type' and '.size' are supported. */ ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,%function;\n\t" );]])], [gcry_cv_gcc_arm_platform_as_ok=yes])]) if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARM assembly implementations]) fi # # Check whether GCC assembler supports features needed for our ARMv8/Aarch64 # implementations. This needs to be done before setting up the # assembler stuff. # AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly implementations], [gcry_cv_gcc_aarch64_platform_as_ok], [gcry_cv_gcc_aarch64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( "asmfunc:\n\t" "eor x0, x0, x30, ror #12;\n\t" "add x0, x0, x30, asr #12;\n\t" "eor v0.16b, v0.16b, v31.16b;\n\t" - - /* Test if '.type' and '.size' are supported. */ - ".size asmfunc,.-asmfunc;\n\t" - ".type asmfunc,@function;\n\t" );]])], [gcry_cv_gcc_aarch64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with ARMv8/Aarch64 assembly implementations]) fi # # Check whether underscores in symbols are required. This needs to be # done before setting up the assembler stuff. # GNUPG_SYS_SYMBOL_UNDERSCORE() ################################# #### #### #### Setup assembler stuff. #### #### Define mpi_cpu_arch. #### #### #### ################################# AC_ARG_ENABLE(mpi-path, AC_HELP_STRING([--enable-mpi-path=EXTRA_PATH], [prepend EXTRA_PATH to list of CPU specific optimizations]), mpi_extra_path="$enableval",mpi_extra_path="") AC_MSG_CHECKING(architecture and mpi assembler functions) if test -f $srcdir/mpi/config.links ; then . $srcdir/mpi/config.links AC_CONFIG_LINKS("$mpi_ln_list") ac_cv_mpi_sflags="$mpi_sflags" AC_MSG_RESULT($mpi_cpu_arch) else AC_MSG_RESULT(failed) AC_MSG_ERROR([mpi/config.links missing!]) fi MPI_SFLAGS="$ac_cv_mpi_sflags" AC_SUBST(MPI_SFLAGS) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_ADD1, test "$mpi_mod_asm_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_SUB1, test "$mpi_mod_asm_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL1, test "$mpi_mod_asm_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL2, test "$mpi_mod_asm_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL3, test "$mpi_mod_asm_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_LSHIFT, test "$mpi_mod_asm_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_MPIH_RSHIFT, test "$mpi_mod_asm_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV, test "$mpi_mod_asm_udiv" = yes) AM_CONDITIONAL(MPI_MOD_ASM_UDIV_QRNND, test "$mpi_mod_asm_udiv_qrnnd" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_ADD1, test "$mpi_mod_c_mpih_add1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_SUB1, test "$mpi_mod_c_mpih_sub1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL1, test "$mpi_mod_c_mpih_mul1" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL2, test "$mpi_mod_c_mpih_mul2" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL3, test "$mpi_mod_c_mpih_mul3" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_LSHIFT, test "$mpi_mod_c_mpih_lshift" = yes) AM_CONDITIONAL(MPI_MOD_C_MPIH_RSHIFT, test "$mpi_mod_c_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV, test "$mpi_mod_c_udiv" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes) # Reset non applicable feature flags. if test "$mpi_cpu_arch" != "x86" ; then aesnisupport="n/a" shaextsupport="n/a" pclmulsupport="n/a" sse41support="n/a" avxsupport="n/a" avx2support="n/a" padlocksupport="n/a" jentsupport="n/a" drngsupport="n/a" fi if test "$mpi_cpu_arch" != "arm" ; then if test "$mpi_cpu_arch" != "aarch64" ; then neonsupport="n/a" armcryptosupport="n/a" fi fi ############################################# #### #### #### Platform specific compiler checks. #### #### #### ############################################# # Following tests depend on warnings to cause compile to fail, so set -Werror # temporarily. _gcc_cflags_save=$CFLAGS CFLAGS="$CFLAGS -Werror" # # Check whether compiler supports 'ms_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute], [gcry_cv_gcc_attribute_ms_abi], [gcry_cv_gcc_attribute_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((ms_abi)) proto(int);]])], [gcry_cv_gcc_attribute_ms_abi=yes])]) if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1, [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute]) fi # # Check whether compiler supports 'sysv_abi' function attribute. # AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute], [gcry_cv_gcc_attribute_sysv_abi], [gcry_cv_gcc_attribute_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[int __attribute__ ((sysv_abi)) proto(int);]])], [gcry_cv_gcc_attribute_sysv_abi=yes])]) if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1, [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute]) fi # # Check whether default calling convention is 'ms_abi'. # if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'ms_abi'], [gcry_cv_gcc_default_abi_is_ms_abi], [gcry_cv_gcc_default_abi_is_ms_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((ms_abi))(*msabi_func)(void); /* warning on SysV abi targets, passes on Windows based targets */ msabi_func = def_func; return msabi_func; }]])], [gcry_cv_gcc_default_abi_is_ms_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1, [Defined if default calling convention is 'ms_abi']) fi fi # # Check whether default calling convention is 'sysv_abi'. # if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'], [gcry_cv_gcc_default_abi_is_sysv_abi], [gcry_cv_gcc_default_abi_is_sysv_abi=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void *test(void) { void *(*def_func)(void) = test; void *__attribute__((sysv_abi))(*sysvabi_func)(void); /* warning on MS ABI targets, passes on SysV ABI targets */ sysvabi_func = def_func; return sysvabi_func; }]])], [gcry_cv_gcc_default_abi_is_sysv_abi=yes])]) if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1, [Defined if default calling convention is 'sysv_abi']) fi fi # Restore flags. CFLAGS=$_gcc_cflags_save; # # Check whether GCC inline assembler supports SSSE3 instructions # This is required for the AES-NI instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSSE3 instructions], [gcry_cv_gcc_inline_asm_ssse3], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_ssse3="n/a" else gcry_cv_gcc_inline_asm_ssse3=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[static unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; void a(void) { __asm__("pshufb %[mask], %%xmm2\n\t"::[mask]"m"(*be_mask):); }]])], [gcry_cv_gcc_inline_asm_ssse3=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_ssse3" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSSE3,1, [Defined if inline assembler supports SSSE3 instructions]) fi # # Check whether GCC inline assembler supports PCLMUL instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions], [gcry_cv_gcc_inline_asm_pclmul], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_pclmul="n/a" else gcry_cv_gcc_inline_asm_pclmul=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc"); }]])], [gcry_cv_gcc_inline_asm_pclmul=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1, [Defined if inline assembler supports PCLMUL instructions]) fi # # Check whether GCC inline assembler supports SHA Extensions instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SHA Extensions instructions], [gcry_cv_gcc_inline_asm_shaext], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_shaext="n/a" else gcry_cv_gcc_inline_asm_shaext=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("sha1rnds4 \$0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1nexte %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha1msg2 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256rnds2 %%xmm0, %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg1 %%xmm1, %%xmm3\n\t":::"cc"); __asm__("sha256msg2 %%xmm1, %%xmm3\n\t":::"cc"); }]])], [gcry_cv_gcc_inline_asm_shaext=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_shaext" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SHAEXT,1, [Defined if inline assembler supports SHA Extensions instructions]) fi # # Check whether GCC inline assembler supports SSE4.1 instructions. # AC_CACHE_CHECK([whether GCC inline assembler supports SSE4.1 instructions], [gcry_cv_gcc_inline_asm_sse41], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_sse41="n/a" else gcry_cv_gcc_inline_asm_sse41=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { int i; __asm__("pextrd \$2, %%xmm0, %[out]\n\t" : [out] "=m" (i)); }]])], [gcry_cv_gcc_inline_asm_sse41=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_sse41" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_SSE41,1, [Defined if inline assembler supports SSE4.1 instructions]) fi # # Check whether GCC inline assembler supports AVX instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions], [gcry_cv_gcc_inline_asm_avx], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_avx="n/a" else gcry_cv_gcc_inline_asm_avx=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("xgetbv; vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):); }]])], [gcry_cv_gcc_inline_asm_avx=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX,1, [Defined if inline assembler supports AVX instructions]) fi # # Check whether GCC inline assembler supports AVX2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions], [gcry_cv_gcc_inline_asm_avx2], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_avx2="n/a" else gcry_cv_gcc_inline_asm_avx2=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[void a(void) { __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc"); }]])], [gcry_cv_gcc_inline_asm_avx2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX2,1, [Defined if inline assembler supports AVX2 instructions]) fi # # Check whether GCC inline assembler supports BMI2 instructions # AC_CACHE_CHECK([whether GCC inline assembler supports BMI2 instructions], [gcry_cv_gcc_inline_asm_bmi2], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_inline_asm_bmi2="n/a" else gcry_cv_gcc_inline_asm_bmi2=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[unsigned int a(unsigned int x, unsigned int y) { unsigned int tmp1, tmp2; asm ("rorxl %2, %1, %0" : "=r" (tmp1) : "rm0" (x), "J" (32 - ((23) & 31))); asm ("andnl %2, %1, %0" : "=r" (tmp2) : "r0" (x), "rm" (y)); return tmp1 + tmp2; }]])], [gcry_cv_gcc_inline_asm_bmi2=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_bmi2" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_BMI2,1, [Defined if inline assembler supports BMI2 instructions]) fi # # Check whether GCC assembler needs "-Wa,--divide" to correctly handle # constant division # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler handles division correctly], [gcry_cv_gcc_as_const_division_ok], [gcry_cv_gcc_as_const_division_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__("xorl \$(123456789/12345678), %ebp;\n\t");]])], [gcry_cv_gcc_as_const_division_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_ok" = "no" ; then # # Add '-Wa,--divide' to CPPFLAGS and try check again. # _gcc_cppflags_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -Wa,--divide" AC_CACHE_CHECK([whether GCC assembler handles division correctly with "-Wa,--divide"], [gcry_cv_gcc_as_const_division_with_wadivide_ok], [gcry_cv_gcc_as_const_division_with_wadivide_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__("xorl \$(123456789/12345678), %ebp;\n\t");]])], [gcry_cv_gcc_as_const_division_with_wadivide_ok=yes])]) if test "$gcry_cv_gcc_as_const_division_with_wadivide_ok" = "no" ; then # '-Wa,--divide' did not work, restore old flags. CPPFLAGS="$_gcc_cppflags_save" fi fi fi # # Check whether GCC assembler supports features needed for our amd64 # implementations # if test $amd64_as_feature_detection = yes; then AC_CACHE_CHECK([whether GCC assembler is compatible for amd64 assembly implementations], [gcry_cv_gcc_amd64_platform_as_ok], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_amd64_platform_as_ok="n/a" else gcry_cv_gcc_amd64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( /* Test if '.type' and '.size' are supported. */ /* These work only on ELF targets. */ "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,@function;\n\t" /* Test if assembler allows use of '/' for constant division * (Solaris/x86 issue). If previous constant division check * and "-Wa,--divide" workaround failed, this causes assembly * to be disable on this machine. */ "xorl \$(123456789/12345678), %ebp;\n\t" );]])], [gcry_cv_gcc_amd64_platform_as_ok=yes]) fi]) if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with amd64 assembly implementations]) fi if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" && test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" && test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations], [gcry_cv_gcc_win64_platform_as_ok], [gcry_cv_gcc_win64_platform_as_ok=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".globl asmfunc\n\t" "asmfunc:\n\t" "xorq \$(1234), %rbp;\n\t" );]])], [gcry_cv_gcc_win64_platform_as_ok=yes])]) if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with WIN64 assembly implementations]) fi fi fi # # Check whether GCC assembler supports features needed for assembly # implementations that use Intel syntax # AC_CACHE_CHECK([whether GCC assembler is compatible for Intel syntax assembly implementations], [gcry_cv_gcc_platform_as_ok_for_intel_syntax], [if test "$mpi_cpu_arch" != "x86" ; then gcry_cv_gcc_platform_as_ok_for_intel_syntax="n/a" else gcry_cv_gcc_platform_as_ok_for_intel_syntax=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".intel_syntax noprefix\n\t" "pxor xmm1, xmm7;\n\t" /* Intel syntax implementation also use GAS macros, so check * for them here. */ "VAL_A = xmm4\n\t" "VAL_B = xmm2\n\t" ".macro SET_VAL_A p1\n\t" " VAL_A = \\\\p1 \n\t" ".endm\n\t" ".macro SET_VAL_B p1\n\t" " VAL_B = \\\\p1 \n\t" ".endm\n\t" "vmovdqa VAL_A, VAL_B;\n\t" "SET_VAL_A eax\n\t" "SET_VAL_B ebp\n\t" "add VAL_A, VAL_B;\n\t" "add VAL_B, 0b10101;\n\t" );]])], [gcry_cv_gcc_platform_as_ok_for_intel_syntax=yes]) fi]) if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then AC_DEFINE(HAVE_INTEL_SYNTAX_PLATFORM_AS,1, [Defined if underlying assembler is compatible with Intel syntax assembly implementations]) fi # # Check whether compiler is configured for ARMv6 or newer architecture # AC_CACHE_CHECK([whether compiler is configured for ARMv6 or newer architecture], [gcry_cv_cc_arm_arch_is_v6], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_cc_arm_arch_is_v6="n/a" else gcry_cv_cc_arm_arch_is_v6=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[ #if defined(__arm__) && \ ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7EM__)) /* empty */ #else /* fail compile if not ARMv6. */ not_armv6 not_armv6 = (not_armv6)not_armv6; #endif ]])], [gcry_cv_cc_arm_arch_is_v6=yes]) fi]) if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then AC_DEFINE(HAVE_ARM_ARCH_V6,1, [Defined if ARM architecture is v6 or newer]) fi # # Check whether GCC inline assembler supports NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports NEON instructions], [gcry_cv_gcc_inline_asm_neon], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_gcc_inline_asm_neon="n/a" else gcry_cv_gcc_inline_asm_neon=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".syntax unified\n\t" ".arm\n\t" ".fpu neon\n\t" "vld1.64 {%q0-%q1}, [%r0]!;\n\t" "vrev64.8 %q0, %q3;\n\t" "vadd.u64 %q0, %q1;\n\t" "vadd.s64 %d3, %d2, %d3;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_NEON,1, [Defined if inline assembler supports NEON instructions]) fi # # Check whether GCC inline assembler supports AArch32 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch32 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch32_crypto], [if test "$mpi_cpu_arch" != "arm" ; then gcry_cv_gcc_inline_asm_aarch32_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch32_crypto=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".syntax unified\n\t" ".arch armv8-a\n\t" ".arm\n\t" ".fpu crypto-neon-fp-armv8\n\t" "sha1h.32 q0, q0;\n\t" "sha1c.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha1su0.32 q0, q0, q0;\n\t" "sha1su1.32 q0, q0;\n\t" "sha256h.32 q0, q0, q0;\n\t" "sha256h2.32 q0, q0, q0;\n\t" "sha1p.32 q0, q0, q0;\n\t" "sha256su0.32 q0, q0;\n\t" "sha256su1.32 q0, q0, q15;\n\t" "aese.8 q0, q0;\n\t" "aesd.8 q0, q0;\n\t" "aesmc.8 q0, q0;\n\t" "aesimc.8 q0, q0;\n\t" "vmull.p64 q0, d0, d0;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch32_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO,1, [Defined if inline assembler supports AArch32 Crypto Extension instructions]) fi # # Check whether GCC inline assembler supports AArch64 NEON instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 NEON instructions], [gcry_cv_gcc_inline_asm_aarch64_neon], [if test "$mpi_cpu_arch" != "aarch64" ; then gcry_cv_gcc_inline_asm_aarch64_neon="n/a" else gcry_cv_gcc_inline_asm_aarch64_neon=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".cpu generic+simd\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch64_neon=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_neon" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_NEON,1, [Defined if inline assembler supports AArch64 NEON instructions]) fi # # Check whether GCC inline assembler supports AArch64 Crypto Extension instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 Crypto Extension instructions], [gcry_cv_gcc_inline_asm_aarch64_crypto], [if test "$mpi_cpu_arch" != "aarch64" ; then gcry_cv_gcc_inline_asm_aarch64_crypto="n/a" else gcry_cv_gcc_inline_asm_aarch64_crypto=no AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[__asm__( ".cpu generic+simd+crypto\n\t" "mov w0, \#42;\n\t" "dup v0.8b, w0;\n\t" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t" "sha1h s0, s0;\n\t" "sha1c q0, s0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha1su0 v0.4s, v0.4s, v0.4s;\n\t" "sha1su1 v0.4s, v0.4s;\n\t" "sha256h q0, q0, v0.4s;\n\t" "sha256h2 q0, q0, v0.4s;\n\t" "sha1p q0, s0, v0.4s;\n\t" "sha256su0 v0.4s, v0.4s;\n\t" "sha256su1 v0.4s, v0.4s, v31.4s;\n\t" "aese v0.16b, v0.16b;\n\t" "aesd v0.16b, v0.16b;\n\t" "aesmc v0.16b, v0.16b;\n\t" "aesimc v0.16b, v0.16b;\n\t" "pmull v0.1q, v0.1d, v31.1d;\n\t" "pmull2 v0.1q, v0.2d, v31.2d;\n\t" ); ]])], [gcry_cv_gcc_inline_asm_aarch64_crypto=yes]) fi]) if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO,1, [Defined if inline assembler supports AArch64 Crypto Extension instructions]) fi ####################################### #### Checks for library functions. #### ####################################### AC_FUNC_VPRINTF # We have replacements for these in src/missing-string.c AC_CHECK_FUNCS(stpcpy strcasecmp) # We have replacements for these in src/g10lib.h AC_CHECK_FUNCS(strtoul memmove stricmp atexit raise) # Other checks AC_CHECK_FUNCS(strerror rand mmap getpagesize sysconf waitpid wait4) AC_CHECK_FUNCS(gettimeofday getrusage gethrtime clock_gettime syslog) AC_CHECK_FUNCS(syscall fcntl ftruncate flockfile) GNUPG_CHECK_MLOCK # # Replacement functions. # AC_REPLACE_FUNCS([getpid clock]) # # Check whether it is necessary to link against libdl. # DL_LIBS="" if test "$use_hmac_binary_check" = yes ; then _gcry_save_libs="$LIBS" LIBS="" AC_SEARCH_LIBS(dlopen, c dl,,,) DL_LIBS=$LIBS LIBS="$_gcry_save_libs" LIBGCRYPT_CONFIG_LIBS="${LIBGCRYPT_CONFIG_LIBS} ${DL_LIBS}" fi AC_SUBST(DL_LIBS) # # Check whether we can use Linux capabilities as requested. # if test "$use_capabilities" = "yes" ; then use_capabilities=no AC_CHECK_HEADERS(sys/capability.h) if test "$ac_cv_header_sys_capability_h" = "yes" ; then AC_CHECK_LIB(cap, cap_init, ac_need_libcap=1) if test "$ac_cv_lib_cap_cap_init" = "yes"; then AC_DEFINE(USE_CAPABILITIES,1, [define if capabilities should be used]) LIBS="$LIBS -lcap" use_capabilities=yes fi fi if test "$use_capabilities" = "no" ; then AC_MSG_WARN([[ *** *** The use of capabilities on this system is not possible. *** You need a recent Linux kernel and some patches: *** fcaps-2.2.9-990610.patch (kernel patch for 2.2.9) *** fcap-module-990613.tar.gz (kernel module) *** libcap-1.92.tar.gz (user mode library and utilities) *** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN *** set (filesystems menu). Be warned: This code is *really* ALPHA. ***]]) fi fi # Check whether a random device is available. if test "$try_dev_random" = yes ; then AC_CACHE_CHECK(for random device, ac_cv_have_dev_random, [if test -r "$NAME_OF_DEV_RANDOM" && test -r "$NAME_OF_DEV_URANDOM" ; then ac_cv_have_dev_random=yes; else ac_cv_have_dev_random=no; fi]) if test "$ac_cv_have_dev_random" = yes; then AC_DEFINE(HAVE_DEV_RANDOM,1, [defined if the system supports a random device] ) fi else AC_MSG_CHECKING(for random device) ac_cv_have_dev_random=no AC_MSG_RESULT(has been disabled) fi # Figure out the random modules for this configuration. if test "$random" = "default"; then # Select default value. if test "$ac_cv_have_dev_random" = yes; then # Try Linuxish random device. random_modules="linux" else case "${host}" in *-*-mingw32ce*) # WindowsCE random device. random_modules="w32ce" ;; *-*-mingw32*|*-*-cygwin*) # Windows random device. random_modules="w32" ;; *) # Build everything, allow to select at runtime. random_modules="$auto_random_modules" ;; esac fi else if test "$random" = "auto"; then # Build everything, allow to select at runtime. random_modules="$auto_random_modules" else random_modules="$random" fi fi # # Other defines # if test mym4_isgit = "yes"; then AC_DEFINE(IS_DEVELOPMENT_VERSION,1, [Defined if this is not a regular release]) fi AM_CONDITIONAL(CROSS_COMPILING, test x$cross_compiling = xyes) # This is handy for debugging so the compiler doesn't rearrange # things and eliminate variables. AC_ARG_ENABLE(optimization, AC_HELP_STRING([--disable-optimization], [disable compiler optimization]), [if test $enableval = no ; then CFLAGS=`echo $CFLAGS | sed 's/-O[[0-9]]//'` fi]) # CFLAGS mangling when using gcc. if test "$GCC" = yes; then CFLAGS="$CFLAGS -Wall" if test "$USE_MAINTAINER_MODE" = "yes"; then CFLAGS="$CFLAGS -Wcast-align -Wshadow -Wstrict-prototypes" CFLAGS="$CFLAGS -Wformat -Wno-format-y2k -Wformat-security" # If -Wno-missing-field-initializers is supported we can enable a # a bunch of really useful warnings. AC_MSG_CHECKING([if gcc supports -Wno-missing-field-initializers]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wno-missing-field-initializers" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -W -Wextra -Wbad-function-cast" CFLAGS="$CFLAGS -Wwrite-strings" CFLAGS="$CFLAGS -Wdeclaration-after-statement" CFLAGS="$CFLAGS -Wno-missing-field-initializers" CFLAGS="$CFLAGS -Wno-sign-compare" fi AC_MSG_CHECKING([if gcc supports -Wpointer-arith]) _gcc_cflags_save=$CFLAGS CFLAGS="-Wpointer-arith" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no) AC_MSG_RESULT($_gcc_wopt) CFLAGS=$_gcc_cflags_save; if test x"$_gcc_wopt" = xyes ; then CFLAGS="$CFLAGS -Wpointer-arith" fi fi fi # Check whether as(1) supports a noeexecstack feature. This test # includes an override option. CL_AS_NOEXECSTACK AC_SUBST(LIBGCRYPT_CONFIG_API_VERSION) AC_SUBST(LIBGCRYPT_CONFIG_LIBS) AC_SUBST(LIBGCRYPT_CONFIG_CFLAGS) AC_SUBST(LIBGCRYPT_CONFIG_HOST) AC_SUBST(LIBGCRYPT_THREAD_MODULES) AC_CONFIG_COMMANDS([gcrypt-conf],[[ chmod +x src/libgcrypt-config ]],[[ prefix=$prefix exec_prefix=$exec_prefix libdir=$libdir datadir=$datadir DATADIRNAME=$DATADIRNAME ]]) ##################### #### Conclusion. #### ##################### # Check that requested feature can actually be used and define # ENABLE_foo_SUPPORT macros. if test x"$aesnisupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_ssse3" != "yes" ; then aesnisupport="no (unsupported by compiler)" fi fi if test x"$shaextsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_shaext" != "yes" ; then shaextsupport="no (unsupported by compiler)" fi fi if test x"$pclmulsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then pclmulsupport="no (unsupported by compiler)" fi fi if test x"$sse41support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_sse41" != "yes" ; then sse41support="no (unsupported by compiler)" fi fi if test x"$avxsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then avxsupport="no (unsupported by compiler)" fi fi if test x"$avx2support" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx2" != "yes" ; then avx2support="no (unsupported by compiler)" fi fi if test x"$neonsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_neon" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_neon" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$armcryptosupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" != "yes" ; then if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" != "yes" ; then neonsupport="no (unsupported by compiler)" fi fi fi if test x"$aesnisupport" = xyes ; then AC_DEFINE(ENABLE_AESNI_SUPPORT, 1, [Enable support for Intel AES-NI instructions.]) fi if test x"$shaextsupport" = xyes ; then AC_DEFINE(ENABLE_SHAEXT_SUPPORT, 1, [Enable support for Intel SHAEXT instructions.]) fi if test x"$pclmulsupport" = xyes ; then AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1, [Enable support for Intel PCLMUL instructions.]) fi if test x"$sse41support" = xyes ; then AC_DEFINE(ENABLE_SSE41_SUPPORT, 1, [Enable support for Intel SSE4.1 instructions.]) fi if test x"$avxsupport" = xyes ; then AC_DEFINE(ENABLE_AVX_SUPPORT,1, [Enable support for Intel AVX instructions.]) fi if test x"$avx2support" = xyes ; then AC_DEFINE(ENABLE_AVX2_SUPPORT,1, [Enable support for Intel AVX2 instructions.]) fi if test x"$neonsupport" = xyes ; then AC_DEFINE(ENABLE_NEON_SUPPORT,1, [Enable support for ARM NEON instructions.]) fi if test x"$armcryptosupport" = xyes ; then AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) fi if test x"$padlocksupport" = xyes ; then AC_DEFINE(ENABLE_PADLOCK_SUPPORT, 1, [Enable support for the PadLock engine.]) fi if test x"$drngsupport" = xyes ; then AC_DEFINE(ENABLE_DRNG_SUPPORT, 1, [Enable support for Intel DRNG (RDRAND instruction).]) fi # Define conditional sources and config.h symbols depending on the # selected ciphers, pubkey-ciphers, digests, kdfs, and random modules. LIST_MEMBER(arcfour, $enabled_ciphers) if test "$found" = "1"; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo" AC_DEFINE(USE_ARCFOUR, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour-amd64.lo" ;; esac fi LIST_MEMBER(blowfish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish.lo" AC_DEFINE(USE_BLOWFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-arm.lo" ;; esac fi LIST_MEMBER(cast5, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo" AC_DEFINE(USE_CAST5, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-arm.lo" ;; esac fi LIST_MEMBER(des, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo" AC_DEFINE(USE_DES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS des-amd64.lo" ;; esac fi LIST_MEMBER(aes, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael.lo" AC_DEFINE(USE_AES, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-amd64.lo" # Build with the SSSE3 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ssse3-amd64.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ssse3-amd64-asm.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-arm.lo" # Build with the ARMv8/AArch32 CE implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-aarch64.lo" # Build with the ARMv8/AArch64 CE implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the AES-NI implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-aesni.lo" # Build with the Padlock implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-padlock.lo" ;; esac fi LIST_MEMBER(twofish, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish.lo" AC_DEFINE(USE_TWOFISH, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-amd64.lo" if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-avx2-amd64.lo" fi ;; arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-aarch64.lo" ;; esac fi LIST_MEMBER(serpent, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent.lo" AC_DEFINE(USE_SERPENT, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the SSE2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-sse2-amd64.lo" ;; esac if test x"$avx2support" = xyes ; then # Build with the AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-avx2-amd64.lo" fi if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent-armv7-neon.lo" fi fi LIST_MEMBER(rfc2268, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS rfc2268.lo" AC_DEFINE(USE_RFC2268, 1, [Defined if this module should be included]) fi LIST_MEMBER(seed, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS seed.lo" AC_DEFINE(USE_SEED, 1, [Defined if this module should be included]) fi LIST_MEMBER(camellia, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia.lo camellia-glue.lo" AC_DEFINE(USE_CAMELLIA, 1, [Defined if this module should be included]) case "${host}" in arm*-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-arm.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aarch64.lo" ;; esac if test x"$avxsupport" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aesni-avx-amd64.lo" fi fi if test x"$avx2support" = xyes ; then if test x"$aesnisupport" = xyes ; then # Build with the AES-NI/AVX2 implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-aesni-avx2-amd64.lo" fi fi fi LIST_MEMBER(idea, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS idea.lo" AC_DEFINE(USE_IDEA, 1, [Defined if this module should be included]) fi LIST_MEMBER(salsa20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20.lo" AC_DEFINE(USE_SALSA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20-amd64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20-armv7-neon.lo" fi fi LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS gost28147.lo" AC_DEFINE(USE_GOST28147, 1, [Defined if this module should be included]) fi LIST_MEMBER(chacha20, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20.lo" AC_DEFINE(USE_CHACHA20, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-amd64-ssse3.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-amd64-avx2.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-aarch64.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20-armv7-neon.lo" fi fi LIST_MEMBER(dsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS dsa.lo" AC_DEFINE(USE_DSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(rsa, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS rsa.lo" AC_DEFINE(USE_RSA, 1, [Defined if this module should be included]) fi LIST_MEMBER(elgamal, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS elgamal.lo" AC_DEFINE(USE_ELGAMAL, 1, [Defined if this module should be included]) fi LIST_MEMBER(ecc, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi LIST_MEMBER(crc, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc.lo" AC_DEFINE(USE_CRC, 1, [Defined if this module should be included]) case "${host}" in i?86-*-* | x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo" ;; esac fi LIST_MEMBER(gostr3411-94, $enabled_digests) if test "$found" = "1" ; then # GOST R 34.11-94 internally uses GOST 28147-89 LIST_MEMBER(gost28147, $enabled_ciphers) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS gostr3411-94.lo" AC_DEFINE(USE_GOST_R_3411_94, 1, [Defined if this module should be included]) fi fi LIST_MEMBER(stribog, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS stribog.lo" AC_DEFINE(USE_GOST_R_3411_12, 1, [Defined if this module should be included]) fi LIST_MEMBER(md2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md2.lo" AC_DEFINE(USE_MD2, 1, [Defined if this module should be included]) fi LIST_MEMBER(md4, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md4.lo" AC_DEFINE(USE_MD4, 1, [Defined if this module should be included]) fi LIST_MEMBER(md5, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS md5.lo" AC_DEFINE(USE_MD5, 1, [Defined if this module should be included]) fi LIST_MEMBER(rmd160, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS rmd160.lo" AC_DEFINE(USE_RMD160, 1, [Defined if this module should be included]) fi LIST_MEMBER(sha256, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256.lo" AC_DEFINE(USE_SHA256, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-intel-shaext.lo" ;; esac fi LIST_MEMBER(sha512, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512.lo" AC_DEFINE(USE_SHA512, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-avx2-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-arm.lo" ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-armv7-neon.lo" fi fi LIST_MEMBER(sha3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak.lo" AC_DEFINE(USE_SHA3, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation : ;; esac if test x"$neonsupport" = xyes ; then # Build with the NEON implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak-armv7-neon.lo" fi fi LIST_MEMBER(tiger, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS tiger.lo" AC_DEFINE(USE_TIGER, 1, [Defined if this module should be included]) fi LIST_MEMBER(whirlpool, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool.lo" AC_DEFINE(USE_WHIRLPOOL, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool-sse2-amd64.lo" ;; esac fi LIST_MEMBER(blake2, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2.lo" AC_DEFINE(USE_BLAKE2, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2b-amd64-avx2.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2s-amd64-avx.lo" ;; esac fi # SHA-1 needs to be included always for example because it is used by # random-csprng.c. GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1.lo" AC_DEFINE(USE_SHA1, 1, [Defined if this module should be included]) case "${host}" in x86_64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-ssse3-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-avx-amd64.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-avx-bmi2-amd64.lo" ;; arm*-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv7-neon.lo" GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv8-aarch32-ce.lo" ;; aarch64-*-*) # Build with the assembly implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-armv8-aarch64-ce.lo" ;; esac case "$mpi_cpu_arch" in x86) # Build with the SHAEXT implementation GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-intel-shaext.lo" ;; esac LIST_MEMBER(sm3, $enabled_digests) if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo" AC_DEFINE(USE_SM3, 1, [Defined if this module should be included]) fi LIST_MEMBER(scrypt, $enabled_kdfs) if test "$found" = "1" ; then GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo" AC_DEFINE(USE_SCRYPT, 1, [Defined if this module should be included]) fi LIST_MEMBER(linux, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndlinux.lo" AC_DEFINE(USE_RNDLINUX, 1, [Defined if the /dev/random RNG should be used.]) fi LIST_MEMBER(unix, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndunix.lo" AC_DEFINE(USE_RNDUNIX, 1, [Defined if the default Unix RNG should be used.]) fi LIST_MEMBER(egd, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndegd.lo" AC_DEFINE(USE_RNDEGD, 1, [Defined if the EGD based RNG should be used.]) fi LIST_MEMBER(w32, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32.lo" AC_DEFINE(USE_RNDW32, 1, [Defined if the Windows specific RNG should be used.]) fi LIST_MEMBER(w32ce, $random_modules) if test "$found" = "1" ; then GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32ce.lo" AC_DEFINE(USE_RNDW32CE, 1, [Defined if the WindowsCE specific RNG should be used.]) fi AC_SUBST([GCRYPT_CIPHERS]) AC_SUBST([GCRYPT_PUBKEY_CIPHERS]) AC_SUBST([GCRYPT_DIGESTS]) AC_SUBST([GCRYPT_KDFS]) AC_SUBST([GCRYPT_RANDOM]) AC_SUBST(LIBGCRYPT_CIPHERS, $enabled_ciphers) AC_SUBST(LIBGCRYPT_PUBKEY_CIPHERS, $enabled_pubkey_ciphers) AC_SUBST(LIBGCRYPT_DIGESTS, $enabled_digests) # For printing the configuration we need a colon separated list of # algorithm names. tmp=`echo "$enabled_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_CIPHERS, "$tmp", [List of available cipher algorithms]) tmp=`echo "$enabled_pubkey_ciphers" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_PUBKEY_CIPHERS, "$tmp", [List of available public key cipher algorithms]) tmp=`echo "$enabled_digests" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_DIGESTS, "$tmp", [List of available digest algorithms]) tmp=`echo "$enabled_kdfs" | tr ' ' : ` AC_DEFINE_UNQUOTED(LIBGCRYPT_KDFS, "$tmp", [List of available KDF algorithms]) # # Define conditional sources depending on the used hardware platform. # Note that all possible modules must also be listed in # src/Makefile.am (EXTRA_libgcrypt_la_SOURCES). # GCRYPT_HWF_MODULES= case "$mpi_cpu_arch" in x86) AC_DEFINE(HAVE_CPU_ARCH_X86, 1, [Defined for the x86 platforms]) GCRYPT_HWF_MODULES="hwf-x86.lo" ;; alpha) AC_DEFINE(HAVE_CPU_ARCH_ALPHA, 1, [Defined for Alpha platforms]) ;; sparc) AC_DEFINE(HAVE_CPU_ARCH_SPARC, 1, [Defined for SPARC platforms]) ;; mips) AC_DEFINE(HAVE_CPU_ARCH_MIPS, 1, [Defined for MIPS platforms]) ;; m68k) AC_DEFINE(HAVE_CPU_ARCH_M68K, 1, [Defined for M68k platforms]) ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) GCRYPT_HWF_MODULES="hwf-arm.lo" ;; aarch64) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM AArch64 platforms]) GCRYPT_HWF_MODULES="hwf-arm.lo" ;; esac AC_SUBST([GCRYPT_HWF_MODULES]) # # Option to disable building of doc file # build_doc=yes AC_ARG_ENABLE([doc], AC_HELP_STRING([--disable-doc], [do not build the documentation]), build_doc=$enableval, build_doc=yes) AM_CONDITIONAL([BUILD_DOC], [test "x$build_doc" != xno]) # # Provide information about the build. # BUILD_REVISION="mym4_revision" AC_SUBST(BUILD_REVISION) AC_DEFINE_UNQUOTED(BUILD_REVISION, "$BUILD_REVISION", [GIT commit id revision used to build this package]) changequote(,)dnl BUILD_FILEVERSION=`echo "$VERSION" | sed 's/\([0-9.]*\).*/\1./;s/\./,/g'` changequote([,])dnl BUILD_FILEVERSION="${BUILD_FILEVERSION}mym4_revision_dec" AC_SUBST(BUILD_FILEVERSION) AC_ARG_ENABLE([build-timestamp], AC_HELP_STRING([--enable-build-timestamp], [set an explicit build timestamp for reproducibility. (default is the current time in ISO-8601 format)]), [if test "$enableval" = "yes"; then BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date` else BUILD_TIMESTAMP="$enableval" fi], [BUILD_TIMESTAMP=""]) AC_SUBST(BUILD_TIMESTAMP) AC_DEFINE_UNQUOTED(BUILD_TIMESTAMP, "$BUILD_TIMESTAMP", [The time this package was configured for a build]) # And create the files. AC_CONFIG_FILES([ Makefile m4/Makefile compat/Makefile mpi/Makefile cipher/Makefile random/Makefile doc/Makefile src/Makefile src/gcrypt.h src/libgcrypt-config src/versioninfo.rc tests/Makefile ]) AC_CONFIG_FILES([tests/hashtest-256g], [chmod +x tests/hashtest-256g]) AC_CONFIG_FILES([tests/basic-disable-all-hwf], [chmod +x tests/basic-disable-all-hwf]) AC_OUTPUT detection_module="${GCRYPT_HWF_MODULES%.lo}" test -n "$detection_module" || detection_module="none" # Give some feedback GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Libgcrypt],[v${VERSION} has been configured as follows:]) GCRY_MSG_SHOW([],[]) GCRY_MSG_SHOW([Platform: ],[$PRINTABLE_OS_NAME ($host)]) GCRY_MSG_SHOW([Hardware detection module:],[$detection_module]) GCRY_MSG_WRAP([Enabled cipher algorithms:],[$enabled_ciphers]) GCRY_MSG_WRAP([Enabled digest algorithms:],[$enabled_digests]) GCRY_MSG_WRAP([Enabled kdf algorithms: ],[$enabled_kdfs]) GCRY_MSG_WRAP([Enabled pubkey algorithms:],[$enabled_pubkey_ciphers]) GCRY_MSG_SHOW([Random number generator: ],[$random]) GCRY_MSG_SHOW([Try using jitter entropy: ],[$jentsupport]) GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities]) GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport]) GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport]) GCRY_MSG_SHOW([Try using Intel SHAEXT: ],[$shaextsupport]) GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport]) GCRY_MSG_SHOW([Try using Intel SSE4.1: ],[$sse41support]) GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport]) GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport]) GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then cat <