diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S index fa8cd012..4ead1c23 100644 --- a/mpi/aarch64/mpih-add1.S +++ b/mpi/aarch64/mpih-add1.S @@ -1,71 +1,72 @@ /* ARM64 add_n -- Add two limb vectors of the same length > 0 and store * sum in a third limb vector. * * Copyright (C) 2013 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * */ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t * _gcry_mpih_add_n( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_ptr_t s2_ptr, x2 * mpi_size_t size) x3 */ .text .globl _gcry_mpih_add_n -.type _gcry_mpih_add_n,%function +ELF(.type _gcry_mpih_add_n,%function) _gcry_mpih_add_n: and x5, x3, #3; adds xzr, xzr, xzr; /* clear carry flag */ cbz x5, .Large_loop; .Loop: ldr x4, [x1], #8; sub x3, x3, #1; ldr x11, [x2], #8; and x5, x3, #3; adcs x4, x4, x11; str x4, [x0], #8; cbz x3, .Lend; cbnz x5, .Loop; .Large_loop: ldp x4, x6, [x1], #16; ldp x5, x7, [x2], #16; ldp x8, x10, [x1], #16; ldp x9, x11, [x2], #16; sub x3, x3, #4; adcs x4, x4, x5; adcs x6, x6, x7; adcs x8, x8, x9; adcs x10, x10, x11; stp x4, x6, [x0], #16; stp x8, x10, [x0], #16; cbnz x3, .Large_loop; .Lend: adc x0, xzr, xzr; ret; -.size _gcry_mpih_add_n,.-_gcry_mpih_add_n; +ELF(.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;) diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S index 65e98fe6..8a862693 100644 --- a/mpi/aarch64/mpih-mul1.S +++ b/mpi/aarch64/mpih-mul1.S @@ -1,96 +1,97 @@ /* ARM64 mul_1 -- Multiply a limb vector with a limb and store the result in * a second limb vector. * * Copyright (C) 2013 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * */ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_size_t s1_size, x2 * mpi_limb_t s2_limb) x3 */ .text .globl _gcry_mpih_mul_1 -.type _gcry_mpih_mul_1,%function +ELF(.type _gcry_mpih_mul_1,%function) _gcry_mpih_mul_1: and x5, x2, #3; mov x4, xzr; cbz x5, .Large_loop; .Loop: ldr x5, [x1], #8; sub x2, x2, #1; mul x9, x5, x3; umulh x10, x5, x3; and x5, x2, #3; adds x4, x4, x9; str x4, [x0], #8; adc x4, x10, xzr; cbz x2, .Lend; cbnz x5, .Loop; .Large_loop: ldp x5, x6, [x1]; sub x2, x2, #4; mul x9, x5, x3; ldp x7, x8, [x1, #16]; umulh x10, x5, x3; add x1, x1, #32; adds x4, x4, x9; str x4, [x0], #8; mul x11, x6, x3; adc x4, x10, xzr; umulh x12, x6, x3; adds x4, x4, x11; str x4, [x0], #8; mul x13, x7, x3; adc x4, x12, xzr; umulh x14, x7, x3; adds x4, x4, x13; str x4, [x0], #8; mul x15, x8, x3; adc x4, x14, xzr; umulh x16, x8, x3; adds x4, x4, x15; str x4, [x0], #8; adc x4, x16, xzr; cbnz x2, .Large_loop; .Lend: mov x0, x4; ret; -.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1; +ELF(.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;) diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S index bd3b2c9d..c7c08e5a 100644 --- a/mpi/aarch64/mpih-mul2.S +++ b/mpi/aarch64/mpih-mul2.S @@ -1,108 +1,109 @@ /* ARM64 mul_2 -- Multiply a limb vector with a limb and add the result to * a second limb vector. * * Copyright (C) 2013 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * */ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_size_t s1_size, x2 * mpi_limb_t s2_limb) x3 */ .text .globl _gcry_mpih_addmul_1 -.type _gcry_mpih_addmul_1,%function +ELF(.type _gcry_mpih_addmul_1,%function) _gcry_mpih_addmul_1: and x5, x2, #3; mov x6, xzr; mov x7, xzr; cbz x5, .Large_loop; .Loop: ldr x5, [x1], #8; mul x12, x5, x3; ldr x4, [x0]; umulh x13, x5, x3; sub x2, x2, #1; adds x12, x12, x4; and x5, x2, #3; adc x13, x13, x7; adds x12, x12, x6; str x12, [x0], #8; adc x6, x7, x13; cbz x2, .Lend; cbnz x5, .Loop; .Large_loop: ldp x5, x9, [x1], #16; sub x2, x2, #4; ldp x4, x8, [x0]; mul x12, x5, x3; umulh x13, x5, x3; adds x12, x12, x4; mul x14, x9, x3; adc x13, x13, x7; adds x12, x12, x6; umulh x15, x9, x3; str x12, [x0], #8; adc x6, x7, x13; adds x14, x14, x8; ldp x5, x9, [x1], #16; adc x15, x15, x7; adds x14, x14, x6; mul x12, x5, x3; str x14, [x0], #8; ldp x4, x8, [x0]; umulh x13, x5, x3; adc x6, x7, x15; adds x12, x12, x4; mul x14, x9, x3; adc x13, x13, x7; adds x12, x12, x6; umulh x15, x9, x3; str x12, [x0], #8; adc x6, x7, x13; adds x14, x14, x8; adc x15, x15, x7; adds x14, x14, x6; str x14, [x0], #8; adc x6, x7, x15; cbnz x2, .Large_loop; .Lend: mov x0, x6; ret; -.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1; +ELF(.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;) diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S index a58bc53a..ccc961e6 100644 --- a/mpi/aarch64/mpih-mul3.S +++ b/mpi/aarch64/mpih-mul3.S @@ -1,121 +1,122 @@ /* ARM mul_3 -- Multiply a limb vector with a limb and subtract the result * from a second limb vector. * * Copyright (C) 2013 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * */ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_size_t s1_size, x2 * mpi_limb_t s2_limb) x3 */ .text .globl _gcry_mpih_submul_1 -.type _gcry_mpih_submul_1,%function +ELF(.type _gcry_mpih_submul_1,%function) _gcry_mpih_submul_1: and x5, x2, #3; mov x7, xzr; cbz x5, .Large_loop; subs xzr, xzr, xzr; .Loop: ldr x4, [x1], #8; cinc x7, x7, cc; ldr x5, [x0]; sub x2, x2, #1; mul x6, x4, x3; subs x5, x5, x7; umulh x4, x4, x3; and x10, x2, #3; cset x7, cc; subs x5, x5, x6; add x7, x7, x4; str x5, [x0], #8; cbz x2, .Loop_end; cbnz x10, .Loop; cinc x7, x7, cc; .Large_loop: ldp x4, x8, [x1], #16; sub x2, x2, #4; ldp x5, x9, [x0]; mul x6, x4, x3; subs x5, x5, x7; umulh x4, x4, x3; cset x7, cc; subs x5, x5, x6; mul x6, x8, x3; add x7, x7, x4; str x5, [x0], #8; cinc x7, x7, cc; umulh x8, x8, x3; subs x9, x9, x7; cset x7, cc; subs x9, x9, x6; ldp x4, x10, [x1], #16; str x9, [x0], #8; add x7, x7, x8; ldp x5, x9, [x0]; cinc x7, x7, cc; mul x6, x4, x3; subs x5, x5, x7; umulh x4, x4, x3; cset x7, cc; subs x5, x5, x6; mul x6, x10, x3; add x7, x7, x4; str x5, [x0], #8; cinc x7, x7, cc; umulh x10, x10, x3; subs x9, x9, x7; cset x7, cc; subs x9, x9, x6; add x7, x7, x10; str x9, [x0], #8; cinc x7, x7, cc; cbnz x2, .Large_loop; mov x0, x7; ret; .Loop_end: cinc x0, x7, cc; ret; -.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1; +ELF(.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;) diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S index cbf2f08f..4a663732 100644 --- a/mpi/aarch64/mpih-sub1.S +++ b/mpi/aarch64/mpih-sub1.S @@ -1,71 +1,72 @@ /* ARM64 sub_n -- Subtract two limb vectors of the same length > 0 and store * sum in a third limb vector. * * Copyright (C) 2013 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * */ #include "sysdep.h" #include "asm-syntax.h" +#include "asm-common-aarch64.h" /******************* * mpi_limb_t * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, x0 * mpi_ptr_t s1_ptr, x1 * mpi_ptr_t s2_ptr, x2 * mpi_size_t size) x3 */ .text .globl _gcry_mpih_sub_n -.type _gcry_mpih_sub_n,%function +ELF(.type _gcry_mpih_sub_n,%function) _gcry_mpih_sub_n: and x5, x3, #3; subs xzr, xzr, xzr; /* prepare carry flag for sub */ cbz x5, .Large_loop; .Loop: ldr x4, [x1], #8; sub x3, x3, #1; ldr x11, [x2], #8; and x5, x3, #3; sbcs x4, x4, x11; str x4, [x0], #8; cbz x3, .Lend; cbnz x5, .Loop; .Large_loop: ldp x4, x6, [x1], #16; ldp x5, x7, [x2], #16; ldp x8, x10, [x1], #16; ldp x9, x11, [x2], #16; sub x3, x3, #4; sbcs x4, x4, x5; sbcs x6, x6, x7; sbcs x8, x8, x9; sbcs x10, x10, x11; stp x4, x6, [x0], #16; stp x8, x10, [x0], #16; cbnz x3, .Large_loop; .Lend: cset x0, cc; ret; -.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n; +ELF(.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;) diff --git a/mpi/asm-common-aarch64.h b/mpi/asm-common-aarch64.h new file mode 100644 index 00000000..12694130 --- /dev/null +++ b/mpi/asm-common-aarch64.h @@ -0,0 +1,30 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsjö + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef MPI_ASM_COMMON_AARCH64_H +#define MPI_ASM_COMMON_AARCH64_H + +#ifdef __ELF__ +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#endif /* MPI_ASM_COMMON_AARCH64_H */