diff --git a/mpi/config.links b/mpi/config.links index e4fc4fc4..deb98bf0 100644 --- a/mpi/config.links +++ b/mpi/config.links @@ -1,470 +1,470 @@ # config.links - helper for ../configure -*- mode: sh -*- # Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. # Copyright (C) 2012 g10 Code GmbH # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA # # sourced by ../configure to get the list of files to link # this should set $mpi_ln_list. # Note: this is called from the above directory. # # Reguired variables: # $ac_cv_sys_symbol_underscore # $gcry_cv_gcc_arm_platform_as_ok mpi_sflags= mpi_extra_modules= mpi_cpu_arch= test -d ./mpi || mkdir ./mpi # We grep the list of modules from the Makefile so that # we don't need to maintain them here. mpi_standard_modules=`$AWK '/^#BEGIN_ASM_LIST/,/^#END_ASM_LIST/ { if( $3 != "O" ) print $2 }' $srcdir/mpi/Makefile.am` mpi_optional_modules=`$AWK '/^#BEGIN_ASM_LIST/,/^#END_ASM_LIST/ { if( $3 == "O" ) print $2 }' $srcdir/mpi/Makefile.am` echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h echo "/* Host: ${host} */" >>./mpi/asm-syntax.h case "${host}" in i[34567]86*-*-openbsd[12]* | \ i[34567]86*-*-openbsd3.[0123]*) echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h path="" mpi_cpu_arch="x86" ;; i[3467]86*-*-openbsd* | \ i[3467]86*-*-freebsd*-elf | \ i[3467]86*-*-freebsd[3-9]* | \ i[3467]86*-*-freebsd[12][0-9]*| \ i[3467]86*-*-freebsdelf* | \ i[3467]86*-*-netbsd* | \ i[3467]86*-*-k*bsd*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" mpi_cpu_arch="x86" ;; i586*-*-openbsd* | \ i586*-*-freebsd*-elf | \ i586*-*-freebsd[3-9]* | \ i586*-*-freebsd[12][0-9]*| \ i586*-*-freebsdelf* | \ i586*-*-netbsd* | \ i586*-*-k*bsd* | \ pentium-*-netbsd* | \ pentiumpro-*-netbsd*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h - path="i586 i386" + path="i386" mpi_cpu_arch="x86" ;; i[34]86*-*-bsdi4*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" mpi_cpu_arch="x86" ;; i[3467]86*-*-linuxaout* | \ i[3467]86*-*-linuxoldld* | \ i[3467]86*-*-*bsd*) echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" mpi_cpu_arch="x86" ;; i586*-*-linuxaout* | \ i586*-*-linuxoldld* | \ i586*-*-*bsd*) echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h - path="i586 i386" + path="i386" mpi_cpu_arch="x86" ;; i[3467]86*-msdosdjgpp* | \ i[34]86*-apple-darwin*) echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" mpi_cpu_arch="x86" ;; i586*-msdosdjgpp* | \ i[567]86*-apple-darwin*) echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h - path="i586 i386" + path="i386" mpi_cpu_arch="x86" ;; i[3467]86*-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" mpi_cpu_arch="x86" ;; i586*-*-* | \ pentium-*-* | \ pentiumpro-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h - path="i586 i386" + path="i386" mpi_cpu_arch="x86" ;; x86_64-apple-darwin*) echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h cat $srcdir/mpi/amd64/func_abi.h >>./mpi/asm-syntax.h path="amd64" mpi_cpu_arch="x86" ;; x86_64-*mingw32*) echo '#define USE_MS_ABI' >>./mpi/asm-syntax.h echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h cat $srcdir/mpi/amd64/func_abi.h >>./mpi/asm-syntax.h path="amd64" mpi_cpu_arch="x86" ;; x86_64-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h cat $srcdir/mpi/amd64/func_abi.h >>./mpi/asm-syntax.h path="amd64" mpi_cpu_arch="x86" ;; alpha*-*-*) echo '/* configured for alpha */' >>./mpi/asm-syntax.h path="alpha" mpi_extra_modules="udiv-qrnnd" mpi_cpu_arch="alpha" ;; aarch64-*-*) echo '/* configured for aarch64 */' >>./mpi/asm-syntax.h path="aarch64" mpi_cpu_arch="aarch64" ;; arm*-*-*) mpi_cpu_arch="arm" if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then echo '/* configured for arm */' >>./mpi/asm-syntax.h path="arm" else echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h path="" fi ;; hppa7000*-*-*) echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h path="hppa1.1 hppa" mpi_extra_modules="udiv-qrnnd" mpi_cpu_arch="hppa" ;; hppa1.0*-*-*) echo '/* configured for HPPA 1.0 */' >>./mpi/asm-syntax.h path="hppa" mpi_extra_modules="udiv-qrnnd" mpi_cpu_arch="hppa" ;; hppa*-*-*) # assume pa7100 echo '/* configured for HPPA (pa7100) */' >>./mpi/asm-syntax.h path="pa7100 hppa1.1 hppa" mpi_extra_modules="udiv-qrnnd" mpi_cpu_arch="hppa" ;; sparc64-*-linux-gnu) echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h path="" mpi_cpu_arch="sparc" ;; sparc64-sun-solaris2*) echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h path="" mpi_cpu_arch="sparc" ;; sparc64-*-netbsd* | sparc64-*-freebsd* | sparc64-*-openbsd*) # There are no sparc64 assembler modules that work on the # *BSDs, so use the generic C functions. echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h path="" mpi_cpu_arch="sparc" ;; sparc64*-*-*) echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h path="" mpi_cpu_arch="sparc" ;; sparc9*-*-* | \ ultrasparc*-*-* ) echo '/* configured for sparc9 or higher */' >>./mpi/asm-syntax.h path="sparc32v8 sparc32" mpi_cpu_arch="sparc" ;; sparc8*-*-* | \ microsparc*-*-*) echo '/* configured for sparc8 */' >>./mpi/asm-syntax.h path="sparc32v8 sparc32" mpi_cpu_arch="sparc" ;; supersparc*-*-*) echo '/* configured for supersparc */' >>./mpi/asm-syntax.h path="supersparc sparc32v8 sparc32" mpi_extra_modules="udiv" mpi_cpu_arch="sparc" ;; sparc*-*-*) echo '/* configured for sparc */' >>./mpi/asm-syntax.h path="sparc32" mpi_extra_modules="udiv" mpi_cpu_arch="sparc" ;; mips[34]*-*-* | \ mips*-*-irix6*) echo '/* configured for MIPS3 */' >>./mpi/asm-syntax.h path="mips3" mpi_cpu_arch="mips" ;; mips*-*-*) echo '/* configured for MIPS2 */' >>./mpi/asm-syntax.h path="mips2" mpi_cpu_arch="mips" ;; s390x*-*-*) echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h path="" mpi_cpu_arch="s390x" ;; # Motorola 68k configurations. Let m68k mean 68020-68040. # mc68000 or mc68060 configurations need to be specified explicitly m680[234]0*-*-linuxaout* | \ m68k*-*-linuxaout*) echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h path="m68k/mc68020 m68k" mpi_cpu_arch="m68k" ;; m68060*-*-linuxaout*) echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h path="m68k" mpi_cpu_arch="m68k" ;; m680[234]0*-*-linux* | \ m68k*-*-linux*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h mpi_cpu_arch="m68k" ;; m68060*-*-linux*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h path="m68k" mpi_cpu_arch="m68k" ;; m68k-atari-mint) echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h path="m68k" mpi_cpu_arch="m68k" ;; m68000*-*-* | \ m68060*-*-*) echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h path="m68k/mc68000" mpi_cpu_arch="m68k" ;; m680[234]0*-*-* | \ m68k*-*-*) echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h path="m68k/mc68020 m68k" mpi_cpu_arch="m68k" ;; powerpc-apple-darwin*) echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h path="" mpi_cpu_arch="ppc" ;; powerpc*-*-netbsd* | powerpc*-*-openbsd*) echo '/* configured {Open,Net}BSD on powerpc */' >>./mpi/asm-syntax.h echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/powerpc32/syntax.h >>./mpi/asm-syntax.h mpi_sflags="-Wa,-mppc" path="powerpc32" mpi_cpu_arch="ppc" ;; ppc620-*-* | \ powerpc64*-*-*) mpi_sflags="-Wa,-mppc" path="powerpc64" mpi_cpu_arch="ppc" ;; powerpc*-*-linux*) echo '/* configured for powerpc/ELF */' >>./mpi/asm-syntax.h echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/powerpc32/syntax.h >>./mpi/asm-syntax.h path="powerpc32" mpi_cpu_arch="ppc" ;; rs6000-*-aix[456789]* | \ rs6000-*-aix3.2.[456789]) mpi_sflags="-Wa,-mpwr" path="power" mpi_extra_modules="udiv-w-sdiv" mpi_cpu_arch="ppc" ;; rs6000-*-* | \ power-*-* | \ power2-*-*) mpi_sflags="-Wa,-mppc" path="power" mpi_extra_modules="udiv-w-sdiv" mpi_cpu_arch="ppc" ;; powerpc-ibm-aix4.2.* ) # I am not sure about this one but a machine identified by # powerpc-ibm-aix4.2.1.0 cannot use the powerpc32 code. mpi_sflags="-Wa,-mpwr" path="power" mpi_extra_modules="udiv-w-sdiv" mpi_cpu_arch="ppc" ;; ppc601-*-*) mpi_sflags="-Wa,-mppc" path="power powerpc32" mpi_cpu_arch="ppc" ;; ppc60[234]*-*-*) mpi_sflags="-Wa,-mppc" path="powerpc32" mpi_cpu_arch="ppc" ;; powerpc*-*-*) mpi_sflags="-Wa,-mppc" path="powerpc32" mpi_cpu_arch="ppc" ;; *) echo '/* Platform not known */' >>./mpi/asm-syntax.h path="" ;; esac # If asm modules are disabled reset the found variables but keep # mpi_cpu_arch. if test "$try_asm_modules" != "yes" ; then echo '/* Assembler modules disabled on request */' >./mpi/asm-syntax.h path="" mpi_sflags="" mpi_extra_modules="" mpi_cpu_arch="disabled" fi # Make sure that mpi_cpu_arch is not the empty string. if test x"$mpi_cpu_arch" = x ; then mpi_cpu_arch="unknown" fi # Add .note.gnu.property section for Intel CET in assembler sources # when CET is enabled. */ if test x"$mpi_cpu_arch" = xx86 ; then cat <> ./mpi/asm-syntax.h #if defined(__ASSEMBLER__) && defined(__CET__) # include #endif EOF fi # Make sysdep.h echo '/* created by config.links - do not edit */' >./mpi/sysdep.h if test x$ac_cv_sys_symbol_underscore = xyes; then cat <>./mpi/sysdep.h #if __STDC__ #define C_SYMBOL_NAME(name) _##name #else #define C_SYMBOL_NAME(name) _/**/name #endif EOF else cat <>./mpi/sysdep.h #define C_SYMBOL_NAME(name) name EOF fi # Figure the required modules out mpi_required_modules=$mpi_standard_modules if test "$mpi_extra_modules" != ""; then for fn in $mpi_extra_modules; do for i in $mpi_optional_modules; do if test "$fn" = "$i" ; then mpi_required_modules="$mpi_required_modules $fn" fi done done fi # Try to get file to link from the assembler subdirectory and # if this fails get it from the generic subdirectory. mpi_ln_list= mpi_mod_list= path=`echo "$mpi_extra_path $path generic" | tr ':' ' '` echo '/* Created by config.links - do not edit */' >./mpi/mod-source-info.h echo "/* Host: ${host} */" >>./mpi/mod-source-info.h echo "static char mod_source_info[] =" >>./mpi/mod-source-info.h for fn in $mpi_required_modules ; do fnu=`echo $fn | sed 's/-/_/g'` eval mpi_mod_c_${fnu}=no eval mpi_mod_asm_${fnu}=no for dir in $path ; do rm -f $srcdir/mpi/$fn.[Sc] if test -f $srcdir/mpi/$dir/$fn.S ; then echo " \":$dir/$fn.S\"" >>./mpi/mod-source-info.h mpi_ln_list="$mpi_ln_list mpi/$fn-asm.S:mpi/$dir/$fn.S" eval mpi_mod_asm_${fnu}=yes mpi_mod_list="$mpi_mod_list $fn" break; elif test -f $srcdir/mpi/$dir/$fn.c ; then echo " \":$dir/$fn.c\"" >>./mpi/mod-source-info.h mpi_ln_list="$mpi_ln_list mpi/$fn.c:mpi/$dir/$fn.c" eval mpi_mod_c_${fnu}=yes mpi_mod_list="$mpi_mod_list $fn" break; fi done done echo " ;" >>./mpi/mod-source-info.h # Same thing for the file which defines the limb size path=`echo "$path generic" | tr ':' ' '` for dir in $path ; do rm -f $srcdir/mpi/mpi-asm-defs.h if test -f $srcdir/mpi/$dir/mpi-asm-defs.h ; then mpi_ln_list="$mpi_ln_list mpi/mpi-asm-defs.h:mpi/$dir/mpi-asm-defs.h" break; fi done diff --git a/mpi/i586/README b/mpi/i586/README deleted file mode 100644 index d73b0826..00000000 --- a/mpi/i586/README +++ /dev/null @@ -1,26 +0,0 @@ -This directory contains mpn functions optimized for Intel Pentium -processors. - -RELEVANT OPTIMIZATION ISSUES - -1. Pentium doesn't allocate cache lines on writes, unlike most other modern -processors. Since the functions in the mpn class do array writes, we have to -handle allocating the destination cache lines by reading a word from it in the -loops, to achieve the best performance. - -2. Pairing of memory operations requires that the two issued operations refer -to different cache banks. The simplest way to insure this is to read/write -two words from the same object. If we make operations on different objects, -they might or might not be to the same cache bank. - -STATUS - -1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium -documentation indicates that they should take only 43/8 = 5.375 cycles/limb, -or 5 cycles/limb asymptotically. - -2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop -overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb. - -3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they -should... diff --git a/mpi/i586/distfiles b/mpi/i586/distfiles deleted file mode 100644 index 8f821fbf..00000000 --- a/mpi/i586/distfiles +++ /dev/null @@ -1,9 +0,0 @@ -mpih-add1.S -mpih-mul1.S -mpih-mul2.S -mpih-mul3.S -mpih-lshift.S -mpih-rshift.S -mpih-sub1.S -README - diff --git a/mpi/i586/mpih-add1.S b/mpi/i586/mpih-add1.S deleted file mode 100644 index 7436d592..00000000 --- a/mpi/i586/mpih-add1.S +++ /dev/null @@ -1,135 +0,0 @@ -/* i80586 add_n -- Add two limb vectors of the same length > 0 and store - * sum in a third limb vector. - * - * Copyright (C) 1992, 1994, 1995, 1996, 1998, - * 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_ptr_t s2_ptr, (sp + 12) - * mpi_size_t size) (sp + 16) - */ - -.text - ALIGN (3) - .globl C_SYMBOL_NAME(_gcry_mpih_add_n) -C_SYMBOL_NAME(_gcry_mpih_add_n:) - pushl %edi - pushl %esi - pushl %ebx - pushl %ebp - - movl 20(%esp),%edi /* res_ptr */ - movl 24(%esp),%esi /* s1_ptr */ - movl 28(%esp),%ebp /* s2_ptr */ - movl 32(%esp),%ecx /* size */ - - movl (%ebp),%ebx - - decl %ecx - movl %ecx,%edx - shrl $3,%ecx - andl $7,%edx - testl %ecx,%ecx /* zero carry flag */ - jz Lend - pushl %edx - - ALIGN (3) -Loop: movl 28(%edi),%eax /* fetch destination cache line */ - leal 32(%edi),%edi - -L1: movl (%esi),%eax - movl 4(%esi),%edx - adcl %ebx,%eax - movl 4(%ebp),%ebx - adcl %ebx,%edx - movl 8(%ebp),%ebx - movl %eax,-32(%edi) - movl %edx,-28(%edi) - -L2: movl 8(%esi),%eax - movl 12(%esi),%edx - adcl %ebx,%eax - movl 12(%ebp),%ebx - adcl %ebx,%edx - movl 16(%ebp),%ebx - movl %eax,-24(%edi) - movl %edx,-20(%edi) - -L3: movl 16(%esi),%eax - movl 20(%esi),%edx - adcl %ebx,%eax - movl 20(%ebp),%ebx - adcl %ebx,%edx - movl 24(%ebp),%ebx - movl %eax,-16(%edi) - movl %edx,-12(%edi) - -L4: movl 24(%esi),%eax - movl 28(%esi),%edx - adcl %ebx,%eax - movl 28(%ebp),%ebx - adcl %ebx,%edx - movl 32(%ebp),%ebx - movl %eax,-8(%edi) - movl %edx,-4(%edi) - - leal 32(%esi),%esi - leal 32(%ebp),%ebp - decl %ecx - jnz Loop - - popl %edx -Lend: - decl %edx /* test %edx w/o clobbering carry */ - js Lend2 - incl %edx -Loop2: - leal 4(%edi),%edi - movl (%esi),%eax - adcl %ebx,%eax - movl 4(%ebp),%ebx - movl %eax,-4(%edi) - leal 4(%esi),%esi - leal 4(%ebp),%ebp - decl %edx - jnz Loop2 -Lend2: - movl (%esi),%eax - adcl %ebx,%eax - movl %eax,(%edi) - - sbbl %eax,%eax - negl %eax - - popl %ebp - popl %ebx - popl %esi - popl %edi - ret - - diff --git a/mpi/i586/mpih-lshift.S b/mpi/i586/mpih-lshift.S deleted file mode 100644 index 9d25fe9d..00000000 --- a/mpi/i586/mpih-lshift.S +++ /dev/null @@ -1,229 +0,0 @@ -/* i80586 lshift - * - * Copyright (C) 1992, 1994, 1998, - * 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_lshift( mpi_ptr_t wp, (sp + 4) - * mpi_ptr_t up, (sp + 8) - * mpi_size_t usize, (sp + 12) - * unsigned cnt) (sp + 16) - */ - -.text - ALIGN (3) - .globl C_SYMBOL_NAME(_gcry_mpih_lshift) -C_SYMBOL_NAME(_gcry_mpih_lshift:) - - pushl %edi - pushl %esi - pushl %ebx - pushl %ebp - - movl 20(%esp),%edi /* res_ptr */ - movl 24(%esp),%esi /* s_ptr */ - movl 28(%esp),%ebp /* size */ - movl 32(%esp),%ecx /* cnt */ - -/* We can use faster code for shift-by-1 under certain conditions. */ - cmp $1,%ecx - jne Lnormal - leal 4(%esi),%eax - cmpl %edi,%eax - jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */ - leal (%esi,%ebp,4),%eax - cmpl %eax,%edi - jnc Lspecial /* jump if res_ptr >= s_ptr + size */ - -Lnormal: - leal -4(%edi,%ebp,4),%edi - leal -4(%esi,%ebp,4),%esi - - movl (%esi),%edx - subl $4,%esi - xorl %eax,%eax - shldl %cl,%edx,%eax /* compute carry limb */ - pushl %eax /* push carry limb onto stack */ - - decl %ebp - pushl %ebp - shrl $3,%ebp - jz Lend - - movl (%edi),%eax /* fetch destination cache line */ - - ALIGN (2) -Loop: movl -28(%edi),%eax /* fetch destination cache line */ - movl %edx,%ebx - - movl (%esi),%eax - movl -4(%esi),%edx - shldl %cl,%eax,%ebx - shldl %cl,%edx,%eax - movl %ebx,(%edi) - movl %eax,-4(%edi) - - movl -8(%esi),%ebx - movl -12(%esi),%eax - shldl %cl,%ebx,%edx - shldl %cl,%eax,%ebx - movl %edx,-8(%edi) - movl %ebx,-12(%edi) - - movl -16(%esi),%edx - movl -20(%esi),%ebx - shldl %cl,%edx,%eax - shldl %cl,%ebx,%edx - movl %eax,-16(%edi) - movl %edx,-20(%edi) - - movl -24(%esi),%eax - movl -28(%esi),%edx - shldl %cl,%eax,%ebx - shldl %cl,%edx,%eax - movl %ebx,-24(%edi) - movl %eax,-28(%edi) - - subl $32,%esi - subl $32,%edi - decl %ebp - jnz Loop - -Lend: popl %ebp - andl $7,%ebp - jz Lend2 -Loop2: movl (%esi),%eax - shldl %cl,%eax,%edx - movl %edx,(%edi) - movl %eax,%edx - subl $4,%esi - subl $4,%edi - decl %ebp - jnz Loop2 - -Lend2: shll %cl,%edx /* compute least significant limb */ - movl %edx,(%edi) /* store it */ - - popl %eax /* pop carry limb */ - - popl %ebp - popl %ebx - popl %esi - popl %edi - ret - -/* We loop from least significant end of the arrays, which is only - permissable if the source and destination don't overlap, since the - function is documented to work for overlapping source and destination. -*/ - -Lspecial: - movl (%esi),%edx - addl $4,%esi - - decl %ebp - pushl %ebp - shrl $3,%ebp - - addl %edx,%edx - incl %ebp - decl %ebp - jz LLend - - movl (%edi),%eax /* fetch destination cache line */ - - ALIGN (2) -LLoop: movl 28(%edi),%eax /* fetch destination cache line */ - movl %edx,%ebx - - movl (%esi),%eax - movl 4(%esi),%edx - adcl %eax,%eax - movl %ebx,(%edi) - adcl %edx,%edx - movl %eax,4(%edi) - - movl 8(%esi),%ebx - movl 12(%esi),%eax - adcl %ebx,%ebx - movl %edx,8(%edi) - adcl %eax,%eax - movl %ebx,12(%edi) - - movl 16(%esi),%edx - movl 20(%esi),%ebx - adcl %edx,%edx - movl %eax,16(%edi) - adcl %ebx,%ebx - movl %edx,20(%edi) - - movl 24(%esi),%eax - movl 28(%esi),%edx - adcl %eax,%eax - movl %ebx,24(%edi) - adcl %edx,%edx - movl %eax,28(%edi) - - leal 32(%esi),%esi /* use leal not to clobber carry */ - leal 32(%edi),%edi - decl %ebp - jnz LLoop - -LLend: popl %ebp - sbbl %eax,%eax /* save carry in %eax */ - andl $7,%ebp - jz LLend2 - addl %eax,%eax /* restore carry from eax */ -LLoop2: movl %edx,%ebx - movl (%esi),%edx - adcl %edx,%edx - movl %ebx,(%edi) - - leal 4(%esi),%esi /* use leal not to clobber carry */ - leal 4(%edi),%edi - decl %ebp - jnz LLoop2 - - jmp LL1 -LLend2: addl %eax,%eax /* restore carry from eax */ -LL1: movl %edx,(%edi) /* store last limb */ - - sbbl %eax,%eax - negl %eax - - popl %ebp - popl %ebx - popl %esi - popl %edi - ret - - diff --git a/mpi/i586/mpih-mul1.S b/mpi/i586/mpih-mul1.S deleted file mode 100644 index 3601d968..00000000 --- a/mpi/i586/mpih-mul1.S +++ /dev/null @@ -1,89 +0,0 @@ -/* i80586 mul_1 -- Multiply a limb vector with a limb and store - * the result in a second limb vector. - * - * Copyright (C) 1992, 1994, 1996, 1998, - * 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - */ - -#define res_ptr edi -#define s1_ptr esi -#define size ecx -#define s2_limb ebp - - TEXT - ALIGN (3) - GLOBL C_SYMBOL_NAME(_gcry_mpih_mul_1) -C_SYMBOL_NAME(_gcry_mpih_mul_1:) - - INSN1(push,l ,R(edi)) - INSN1(push,l ,R(esi)) - INSN1(push,l ,R(ebx)) - INSN1(push,l ,R(ebp)) - - INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) - INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) - INSN2(mov,l ,R(size),MEM_DISP(esp,28)) - INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) - - INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) - INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) - INSN1(neg,l ,R(size)) - INSN2(xor,l ,R(ebx),R(ebx)) - ALIGN (3) - -Loop: INSN2(adc,l ,R(ebx),$0) - INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) - - INSN1(mul,l ,R(s2_limb)) - - INSN2(add,l ,R(ebx),R(eax)) - - INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) - INSN1(inc,l ,R(size)) - - INSN2(mov,l ,R(ebx),R(edx)) - INSN1(jnz, ,Loop) - - INSN2(adc,l ,R(ebx),$0) - INSN2(mov,l ,R(eax),R(ebx)) - INSN1(pop,l ,R(ebp)) - INSN1(pop,l ,R(ebx)) - INSN1(pop,l ,R(esi)) - INSN1(pop,l ,R(edi)) - ret - diff --git a/mpi/i586/mpih-mul2.S b/mpi/i586/mpih-mul2.S deleted file mode 100644 index f32d363a..00000000 --- a/mpi/i586/mpih-mul2.S +++ /dev/null @@ -1,93 +0,0 @@ -/* i80586 addmul_1 -- Multiply a limb vector with a limb and add - * the result to a second limb vector. - * - * Copyright (C) 1992, 1994, 1998, - * 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - */ - -#define res_ptr edi -#define s1_ptr esi -#define size ecx -#define s2_limb ebp - - TEXT - ALIGN (3) - GLOBL C_SYMBOL_NAME(_gcry_mpih_addmul_1) -C_SYMBOL_NAME(_gcry_mpih_addmul_1:) - - INSN1(push,l ,R(edi)) - INSN1(push,l ,R(esi)) - INSN1(push,l ,R(ebx)) - INSN1(push,l ,R(ebp)) - - INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) - INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) - INSN2(mov,l ,R(size),MEM_DISP(esp,28)) - INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) - - INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) - INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) - INSN1(neg,l ,R(size)) - INSN2(xor,l ,R(ebx),R(ebx)) - ALIGN (3) - -Loop: INSN2(adc,l ,R(ebx),$0) - INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) - - INSN1(mul,l ,R(s2_limb)) - - INSN2(add,l ,R(eax),R(ebx)) - INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) - - INSN2(adc,l ,R(edx),$0) - INSN2(add,l ,R(ebx),R(eax)) - - INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) - INSN1(inc,l ,R(size)) - - INSN2(mov,l ,R(ebx),R(edx)) - INSN1(jnz, ,Loop) - - INSN2(adc,l ,R(ebx),$0) - INSN2(mov,l ,R(eax),R(ebx)) - INSN1(pop,l ,R(ebp)) - INSN1(pop,l ,R(ebx)) - INSN1(pop,l ,R(esi)) - INSN1(pop,l ,R(edi)) - ret - diff --git a/mpi/i586/mpih-mul3.S b/mpi/i586/mpih-mul3.S deleted file mode 100644 index fa27d4e1..00000000 --- a/mpi/i586/mpih-mul3.S +++ /dev/null @@ -1,93 +0,0 @@ -/* i80586 submul_1 -- Multiply a limb vector with a limb and add - * the result to a second limb vector. - * - * Copyright (C) 1992, 1994, 1998, - * 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - */ - -#define res_ptr edi -#define s1_ptr esi -#define size ecx -#define s2_limb ebp - - TEXT - ALIGN (3) - GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1) -C_SYMBOL_NAME(_gcry_mpih_submul_1:) - - INSN1(push,l ,R(edi)) - INSN1(push,l ,R(esi)) - INSN1(push,l ,R(ebx)) - INSN1(push,l ,R(ebp)) - - INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) - INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) - INSN2(mov,l ,R(size),MEM_DISP(esp,28)) - INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) - - INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) - INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) - INSN1(neg,l ,R(size)) - INSN2(xor,l ,R(ebx),R(ebx)) - ALIGN (3) - -Loop: INSN2(adc,l ,R(ebx),$0) - INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) - - INSN1(mul,l ,R(s2_limb)) - - INSN2(add,l ,R(eax),R(ebx)) - INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) - - INSN2(adc,l ,R(edx),$0) - INSN2(sub,l ,R(ebx),R(eax)) - - INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) - INSN1(inc,l ,R(size)) - - INSN2(mov,l ,R(ebx),R(edx)) - INSN1(jnz, ,Loop) - - INSN2(adc,l ,R(ebx),$0) - INSN2(mov,l ,R(eax),R(ebx)) - INSN1(pop,l ,R(ebp)) - INSN1(pop,l ,R(ebx)) - INSN1(pop,l ,R(esi)) - INSN1(pop,l ,R(edi)) - ret - diff --git a/mpi/i586/mpih-rshift.S b/mpi/i586/mpih-rshift.S deleted file mode 100644 index c661e3d3..00000000 --- a/mpi/i586/mpih-rshift.S +++ /dev/null @@ -1,228 +0,0 @@ -/* i80586 rshift - * - * Copyright (C) 1992, 1994, 1998, - * 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - - -/******************* - * mpi_limb_t - * _gcry_mpih_rshift( mpi_ptr_t wp, (sp + 4) - * mpi_ptr_t up, (sp + 8) - * mpi_size_t usize, (sp + 12) - * unsigned cnt) (sp + 16) - */ - -.text - ALIGN (3) - .globl C_SYMBOL_NAME(_gcry_mpih_rshift) -C_SYMBOL_NAME(_gcry_mpih_rshift:) - pushl %edi - pushl %esi - pushl %ebx - pushl %ebp - - movl 20(%esp),%edi /* res_ptr */ - movl 24(%esp),%esi /* s_ptr */ - movl 28(%esp),%ebp /* size */ - movl 32(%esp),%ecx /* cnt */ - -/* We can use faster code for shift-by-1 under certain conditions. */ - cmp $1,%ecx - jne Rnormal - leal 4(%edi),%eax - cmpl %esi,%eax - jnc Rspecial /* jump if res_ptr + 1 >= s_ptr */ - leal (%edi,%ebp,4),%eax - cmpl %eax,%esi - jnc Rspecial /* jump if s_ptr >= res_ptr + size */ - -Rnormal: - movl (%esi),%edx - addl $4,%esi - xorl %eax,%eax - shrdl %cl,%edx,%eax /* compute carry limb */ - pushl %eax /* push carry limb onto stack */ - - decl %ebp - pushl %ebp - shrl $3,%ebp - jz Rend - - movl (%edi),%eax /* fetch destination cache line */ - - ALIGN (2) -Roop: movl 28(%edi),%eax /* fetch destination cache line */ - movl %edx,%ebx - - movl (%esi),%eax - movl 4(%esi),%edx - shrdl %cl,%eax,%ebx - shrdl %cl,%edx,%eax - movl %ebx,(%edi) - movl %eax,4(%edi) - - movl 8(%esi),%ebx - movl 12(%esi),%eax - shrdl %cl,%ebx,%edx - shrdl %cl,%eax,%ebx - movl %edx,8(%edi) - movl %ebx,12(%edi) - - movl 16(%esi),%edx - movl 20(%esi),%ebx - shrdl %cl,%edx,%eax - shrdl %cl,%ebx,%edx - movl %eax,16(%edi) - movl %edx,20(%edi) - - movl 24(%esi),%eax - movl 28(%esi),%edx - shrdl %cl,%eax,%ebx - shrdl %cl,%edx,%eax - movl %ebx,24(%edi) - movl %eax,28(%edi) - - addl $32,%esi - addl $32,%edi - decl %ebp - jnz Roop - -Rend: popl %ebp - andl $7,%ebp - jz Rend2 -Roop2: movl (%esi),%eax - shrdl %cl,%eax,%edx /* compute result limb */ - movl %edx,(%edi) - movl %eax,%edx - addl $4,%esi - addl $4,%edi - decl %ebp - jnz Roop2 - -Rend2: shrl %cl,%edx /* compute most significant limb */ - movl %edx,(%edi) /* store it */ - - popl %eax /* pop carry limb */ - - popl %ebp - popl %ebx - popl %esi - popl %edi - ret - -/* We loop from least significant end of the arrays, which is only - permissable if the source and destination don't overlap, since the - function is documented to work for overlapping source and destination. -*/ - -Rspecial: - leal -4(%edi,%ebp,4),%edi - leal -4(%esi,%ebp,4),%esi - - movl (%esi),%edx - subl $4,%esi - - decl %ebp - pushl %ebp - shrl $3,%ebp - - shrl $1,%edx - incl %ebp - decl %ebp - jz RLend - - movl (%edi),%eax /* fetch destination cache line */ - - ALIGN (2) -RLoop: movl -28(%edi),%eax /* fetch destination cache line */ - movl %edx,%ebx - - movl (%esi),%eax - movl -4(%esi),%edx - rcrl $1,%eax - movl %ebx,(%edi) - rcrl $1,%edx - movl %eax,-4(%edi) - - movl -8(%esi),%ebx - movl -12(%esi),%eax - rcrl $1,%ebx - movl %edx,-8(%edi) - rcrl $1,%eax - movl %ebx,-12(%edi) - - movl -16(%esi),%edx - movl -20(%esi),%ebx - rcrl $1,%edx - movl %eax,-16(%edi) - rcrl $1,%ebx - movl %edx,-20(%edi) - - movl -24(%esi),%eax - movl -28(%esi),%edx - rcrl $1,%eax - movl %ebx,-24(%edi) - rcrl $1,%edx - movl %eax,-28(%edi) - - leal -32(%esi),%esi /* use leal not to clobber carry */ - leal -32(%edi),%edi - decl %ebp - jnz RLoop - -RLend: popl %ebp - sbbl %eax,%eax /* save carry in %eax */ - andl $7,%ebp - jz RLend2 - addl %eax,%eax /* restore carry from eax */ -RLoop2: movl %edx,%ebx - movl (%esi),%edx - rcrl $1,%edx - movl %ebx,(%edi) - - leal -4(%esi),%esi /* use leal not to clobber carry */ - leal -4(%edi),%edi - decl %ebp - jnz RLoop2 - - jmp RL1 -RLend2: addl %eax,%eax /* restore carry from eax */ -RL1: movl %edx,(%edi) /* store last limb */ - - movl $0,%eax - rcrl $1,%eax - - popl %ebp - popl %ebx - popl %esi - popl %edi - ret - diff --git a/mpi/i586/mpih-sub1.S b/mpi/i586/mpih-sub1.S deleted file mode 100644 index ef2d5807..00000000 --- a/mpi/i586/mpih-sub1.S +++ /dev/null @@ -1,142 +0,0 @@ -/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store - * sum in a third limb vector. - * - * Copyright (C) 1992, 1994, 1995, 1998, - * 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_ptr_t s2_ptr, (sp + 12) - * mpi_size_t size) (sp + 16) - */ - - -.text - ALIGN (3) - .globl C_SYMBOL_NAME(_gcry_mpih_sub_n) -C_SYMBOL_NAME(_gcry_mpih_sub_n:) - - pushl %edi - pushl %esi - pushl %ebx - pushl %ebp - - movl 20(%esp),%edi /* res_ptr */ - movl 24(%esp),%esi /* s1_ptr */ - movl 28(%esp),%ebp /* s2_ptr */ - movl 32(%esp),%ecx /* size */ - - movl (%ebp),%ebx - - decl %ecx - movl %ecx,%edx - shrl $3,%ecx - andl $7,%edx - testl %ecx,%ecx /* zero carry flag */ - jz Lend - pushl %edx - - ALIGN (3) -Loop: movl 28(%edi),%eax /* fetch destination cache line */ - leal 32(%edi),%edi - -L1: movl (%esi),%eax - movl 4(%esi),%edx - sbbl %ebx,%eax - movl 4(%ebp),%ebx - sbbl %ebx,%edx - movl 8(%ebp),%ebx - movl %eax,-32(%edi) - movl %edx,-28(%edi) - -L2: movl 8(%esi),%eax - movl 12(%esi),%edx - sbbl %ebx,%eax - movl 12(%ebp),%ebx - sbbl %ebx,%edx - movl 16(%ebp),%ebx - movl %eax,-24(%edi) - movl %edx,-20(%edi) - -L3: movl 16(%esi),%eax - movl 20(%esi),%edx - sbbl %ebx,%eax - movl 20(%ebp),%ebx - sbbl %ebx,%edx - movl 24(%ebp),%ebx - movl %eax,-16(%edi) - movl %edx,-12(%edi) - -L4: movl 24(%esi),%eax - movl 28(%esi),%edx - sbbl %ebx,%eax - movl 28(%ebp),%ebx - sbbl %ebx,%edx - movl 32(%ebp),%ebx - movl %eax,-8(%edi) - movl %edx,-4(%edi) - - leal 32(%esi),%esi - leal 32(%ebp),%ebp - decl %ecx - jnz Loop - - popl %edx -Lend: - decl %edx /* test %edx w/o clobbering carry */ - js Lend2 - incl %edx -Loop2: - leal 4(%edi),%edi - movl (%esi),%eax - sbbl %ebx,%eax - movl 4(%ebp),%ebx - movl %eax,-4(%edi) - leal 4(%esi),%esi - leal 4(%ebp),%ebp - decl %edx - jnz Loop2 -Lend2: - movl (%esi),%eax - sbbl %ebx,%eax - movl %eax,(%edi) - - sbbl %eax,%eax - negl %eax - - popl %ebp - popl %ebx - popl %esi - popl %edi - ret - diff --git a/mpi/pentium4/README b/mpi/pentium4/README deleted file mode 100644 index 215fc7f8..00000000 --- a/mpi/pentium4/README +++ /dev/null @@ -1,115 +0,0 @@ -Copyright 2001 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library; see the file COPYING.LIB. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. - - - - - INTEL PENTIUM-4 MPN SUBROUTINES - - -This directory contains mpn functions optimized for Intel Pentium-4. - -The mmx subdirectory has routines using MMX instructions, the sse2 -subdirectory has routines using SSE2 instructions. All P4s have these, the -separate directories are just so configure can omit that code if the -assembler doesn't support it. - - -STATUS - - cycles/limb - - mpn_add_n/sub_n 4 normal, 6 in-place - - mpn_mul_1 4 normal, 6 in-place - mpn_addmul_1 6 - mpn_submul_1 7 - - mpn_mul_basecase 6 cycles/crossproduct (approx) - - mpn_sqr_basecase 3.5 cycles/crossproduct (approx) - or 7.0 cycles/triangleproduct (approx) - - mpn_l/rshift 1.75 - - - -The shifts ought to be able to go at 1.5 c/l, but not much effort has been -applied to them yet. - -In-place operations, and all addmul, submul, mul_basecase and sqr_basecase -calls, suffer from pipeline anomalies associated with write combining and -movd reads and writes to the same or nearby locations. The movq -instructions do not trigger the same hardware problems. Unfortunately, -using movq and splitting/combining seems to require too many extra -instructions to help. Perhaps future chip steppings will be better. - - - -NOTES - -The Pentium-4 pipeline "Netburst", provides for quite a number of surprises. -Many traditional x86 instructions run very slowly, requiring use of -alterative instructions for acceptable performance. - -adcl and sbbl are quite slow at 8 cycles for reg->reg. paddq of 32-bits -within a 64-bit mmx register seems better, though the combination -paddq/psrlq when propagating a carry is still a 4 cycle latency. - -incl and decl should be avoided, instead use add $1 and sub $1. Apparently -the carry flag is not separately renamed, so incl and decl depend on all -previous flags-setting instructions. - -shll and shrl have a 4 cycle latency, or 8 times the latency of the fastest -integer instructions (addl, subl, orl, andl, and some more). shldl and -shrdl seem to have 13 and 15 cycles latency, respectively. Bizarre. - -movq mmx -> mmx does have 6 cycle latency, as noted in the documentation. -pxor/por or similar combination at 2 cycles latency can be used instead. -The movq however executes in the float unit, thereby saving MMX execution -resources. With the right juggling, data moves shouldn't be on a dependent -chain. - -L1 is write-through, but the write-combining sounds like it does enough to -not require explicit destination prefetching. - -xmm registers so far haven't found a use, but not much effort has been -expended. A configure test for whether the operating system knows -fxsave/fxrestor will be needed if they're used. - - - -REFERENCES - -Intel Pentium-4 processor manuals, - - http://developer.intel.com/design/pentium4/manuals - -"Intel Pentium 4 Processor Optimization Reference Manual", Intel, 2001, -order number 248966. Available on-line: - - http://developer.intel.com/design/pentium4/manuals/248966.htm - - - ----------------- -Local variables: -mode: text -fill-column: 76 -End: diff --git a/mpi/pentium4/distfiles b/mpi/pentium4/distfiles deleted file mode 100644 index b419f85a..00000000 --- a/mpi/pentium4/distfiles +++ /dev/null @@ -1,3 +0,0 @@ -README - - diff --git a/mpi/pentium4/mmx/distfiles b/mpi/pentium4/mmx/distfiles deleted file mode 100644 index 8f0ea426..00000000 --- a/mpi/pentium4/mmx/distfiles +++ /dev/null @@ -1,2 +0,0 @@ -mpih-lshift.S -mpih-rshift.S diff --git a/mpi/pentium4/mmx/mpih-lshift.S b/mpi/pentium4/mmx/mpih-lshift.S deleted file mode 100644 index e2dd184b..00000000 --- a/mpi/pentium4/mmx/mpih-lshift.S +++ /dev/null @@ -1,457 +0,0 @@ -/* Intel Pentium-4 mpn_lshift -- left shift. - * - * Copyright 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_lshift( mpi_ptr_t wp, (sp + 4) - * mpi_ptr_t up, (sp + 8) - * mpi_size_t usize, (sp + 12) - * unsigned cnt) (sp + 16) - * - * P4 Willamette, Northwood: 1.75 cycles/limb - * P4 Prescott: 2.0 cycles/limb - */ - -.text - ALIGN (3) - .globl C_SYMBOL_NAME(_gcry_mpih_lshift) -C_SYMBOL_NAME(_gcry_mpih_lshift:) - - - pushl %ebx - pushl %edi - - - movl 20(%esp), %eax - movl 12(%esp), %edx - - movl 16(%esp), %ebx - movl 24(%esp), %ecx - - cmp $5, %eax - jae .Lunroll - - movl -4(%ebx,%eax,4), %edi - decl %eax - - jnz .Lsimple - - shldl %cl, %edi, %eax - - shll %cl, %edi - - movl %edi, (%edx) - popl %edi - - popl %ebx - - ret - - - - - -.Lsimple: - - - - - - - - - - movd (%ebx,%eax,4), %mm5 - - movd %ecx, %mm6 - negl %ecx - - psllq %mm6, %mm5 - addl $32, %ecx - - movd %ecx, %mm7 - psrlq $32, %mm5 - - -.Lsimple_top: - - - - - - - - - - - - - movq -4(%ebx,%eax,4), %mm0 - decl %eax - - psrlq %mm7, %mm0 - - - - movd %mm0, 4(%edx,%eax,4) - jnz .Lsimple_top - - - movd (%ebx), %mm0 - - movd %mm5, %eax - psllq %mm6, %mm0 - - popl %edi - popl %ebx - - movd %mm0, (%edx) - - emms - - ret - - - - - - .align 8, 0x90 -.Lunroll: - - - - - - - - - - movd -4(%ebx,%eax,4), %mm5 - leal (%ebx,%eax,4), %edi - - movd %ecx, %mm6 - andl $4, %edi - - psllq %mm6, %mm5 - jz .Lstart_src_aligned - - - - - - - - - - - - - - - - - - - - movq -8(%ebx,%eax,4), %mm0 - - psllq %mm6, %mm0 - decl %eax - - psrlq $32, %mm0 - - - - movd %mm0, (%edx,%eax,4) -.Lstart_src_aligned: - - movq -8(%ebx,%eax,4), %mm1 - leal (%edx,%eax,4), %edi - - andl $4, %edi - psrlq $32, %mm5 - - movq -16(%ebx,%eax,4), %mm3 - jz .Lstart_dst_aligned - - - - - - - - - - - - - - - - - - - - - movq %mm1, %mm0 - addl $32, %ecx - - psllq %mm6, %mm0 - - movd %ecx, %mm6 - psrlq $32, %mm0 - - - - movd %mm0, -4(%edx,%eax,4) - subl $4, %edx -.Lstart_dst_aligned: - - - psllq %mm6, %mm1 - negl %ecx - - addl $64, %ecx - movq %mm3, %mm2 - - movd %ecx, %mm7 - subl $8, %eax - - psrlq %mm7, %mm3 - - por %mm1, %mm3 - jc .Lfinish - - - - - .align 8, 0x90 -.Lunroll_loop: - - - - - - - - - - - - - - - - - movq 8(%ebx,%eax,4), %mm0 - psllq %mm6, %mm2 - - movq %mm0, %mm1 - psrlq %mm7, %mm0 - - movq %mm3, 24(%edx,%eax,4) - por %mm2, %mm0 - - movq (%ebx,%eax,4), %mm3 - psllq %mm6, %mm1 - - movq %mm0, 16(%edx,%eax,4) - movq %mm3, %mm2 - - psrlq %mm7, %mm3 - subl $4, %eax - - por %mm1, %mm3 - jnc .Lunroll_loop - - - -.Lfinish: - - - testb $2, %al - - jz .Lfinish_no_two - - movq 8(%ebx,%eax,4), %mm0 - psllq %mm6, %mm2 - - movq %mm0, %mm1 - psrlq %mm7, %mm0 - - movq %mm3, 24(%edx,%eax,4) - por %mm2, %mm0 - - movq %mm1, %mm2 - movq %mm0, %mm3 - - subl $2, %eax -.Lfinish_no_two: - - - - - - - - testb $1, %al - movd %mm5, %eax - - popl %edi - jz .Lfinish_zero - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - movd (%ebx), %mm0 - psllq %mm6, %mm2 - - movq %mm3, 12(%edx) - psllq $32, %mm0 - - movq %mm0, %mm1 - psrlq %mm7, %mm0 - - por %mm2, %mm0 - psllq %mm6, %mm1 - - movq %mm0, 4(%edx) - psrlq $32, %mm1 - - andl $32, %ecx - popl %ebx - - jz .Lfinish_one_unaligned - - movd %mm1, (%edx) -.Lfinish_one_unaligned: - - emms - - ret - - - - -.Lfinish_zero: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - movq %mm3, 8(%edx) - andl $32, %ecx - - psllq %mm6, %mm2 - jz .Lfinish_zero_unaligned - - movq %mm2, (%edx) -.Lfinish_zero_unaligned: - - psrlq $32, %mm2 - popl %ebx - - movd %mm5, %eax - - movd %mm2, 4(%edx) - - emms - - ret diff --git a/mpi/pentium4/mmx/mpih-rshift.S b/mpi/pentium4/mmx/mpih-rshift.S deleted file mode 100644 index e3374e3b..00000000 --- a/mpi/pentium4/mmx/mpih-rshift.S +++ /dev/null @@ -1,453 +0,0 @@ -/* Intel Pentium-4 mpn_rshift -- right shift. - * - * Copyright 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_rshift( mpi_ptr_t wp, (sp + 4) - * mpi_ptr_t up, (sp + 8) - * mpi_size_t usize, (sp + 12) - * unsigned cnt) (sp + 16) - * - * P4 Willamette, Northwood: 1.75 cycles/limb - * P4 Prescott: 2.0 cycles/limb - */ - -.text - ALIGN (3) - .globl C_SYMBOL_NAME(_gcry_mpih_rshift) -C_SYMBOL_NAME(_gcry_mpih_rshift:) - pushl %ebx - pushl %edi - - - movl 20(%esp), %eax - movl 12(%esp), %edx - - movl 16(%esp), %ebx - movl 24(%esp), %ecx - - cmp $5, %eax - jae .Lunroll - - decl %eax - movl (%ebx), %edi - - jnz .Lsimple - - shrdl %cl, %edi, %eax - - shrl %cl, %edi - - movl %edi, (%edx) - popl %edi - - popl %ebx - - ret - - - - - - .align 8, 0x90 -.Lsimple: - - - - - - - - - - movd (%ebx), %mm5 - leal (%ebx,%eax,4), %ebx - - movd %ecx, %mm6 - leal -4(%edx,%eax,4), %edx - - psllq $32, %mm5 - negl %eax - - - - - - - -.Lsimple_top: - - - - - - - - - - movq (%ebx,%eax,4), %mm0 - incl %eax - - psrlq %mm6, %mm0 - - movd %mm0, (%edx,%eax,4) - jnz .Lsimple_top - - - movd (%ebx), %mm0 - psrlq %mm6, %mm5 - - psrlq %mm6, %mm0 - popl %edi - - movd %mm5, %eax - popl %ebx - - movd %mm0, 4(%edx) - - emms - - ret - - - - - - .align 8, 0x90 -.Lunroll: - - - - - - - - - - movd (%ebx), %mm5 - movl $4, %edi - - movd %ecx, %mm6 - testl %edi, %ebx - - psllq $32, %mm5 - jz .Lstart_src_aligned - - - - - - - - - - - - - - - - - movq (%ebx), %mm0 - - psrlq %mm6, %mm0 - addl $4, %ebx - - decl %eax - - movd %mm0, (%edx) - addl $4, %edx -.Lstart_src_aligned: - - - movq (%ebx), %mm1 - testl %edi, %edx - - psrlq %mm6, %mm5 - jz .Lstart_dst_aligned - - - - - - - - - - - - - - - - - - movq %mm1, %mm0 - addl $32, %ecx - - psrlq %mm6, %mm0 - - movd %ecx, %mm6 - - movd %mm0, (%edx) - addl $4, %edx -.Lstart_dst_aligned: - - - movq 8(%ebx), %mm3 - negl %ecx - - movq %mm3, %mm2 - addl $64, %ecx - - movd %ecx, %mm7 - psrlq %mm6, %mm1 - - leal -12(%ebx,%eax,4), %ebx - leal -20(%edx,%eax,4), %edx - - psllq %mm7, %mm3 - subl $7, %eax - - por %mm1, %mm3 - negl %eax - - jns .Lfinish - - - - - - - - - - - - - - - - .align 8, 0x90 -.Lunroll_loop: - - - - - - - - - - - - - - - - - movq (%ebx,%eax,4), %mm0 - psrlq %mm6, %mm2 - - movq %mm0, %mm1 - psllq %mm7, %mm0 - - movq %mm3, -8(%edx,%eax,4) - por %mm2, %mm0 - - movq 8(%ebx,%eax,4), %mm3 - psrlq %mm6, %mm1 - - movq %mm0, (%edx,%eax,4) - movq %mm3, %mm2 - - psllq %mm7, %mm3 - addl $4, %eax - - por %mm1, %mm3 - js .Lunroll_loop - - -.Lfinish: - - - testb $2, %al - - jnz .Lfinish_no_two - - movq (%ebx,%eax,4), %mm0 - psrlq %mm6, %mm2 - - movq %mm0, %mm1 - psllq %mm7, %mm0 - - movq %mm3, -8(%edx,%eax,4) - por %mm2, %mm0 - - movq %mm1, %mm2 - movq %mm0, %mm3 - - addl $2, %eax -.Lfinish_no_two: - - - - - - - - testb $1, %al - popl %edi - - movd %mm5, %eax - jnz .Lfinish_zero - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - movd 8(%ebx), %mm0 - psrlq %mm6, %mm2 - - movq %mm0, %mm1 - psllq %mm7, %mm0 - - movq %mm3, (%edx) - por %mm2, %mm0 - - psrlq %mm6, %mm1 - andl $32, %ecx - - popl %ebx - jz .Lfinish_one_unaligned - - - movd %mm1, 16(%edx) -.Lfinish_one_unaligned: - - movq %mm0, 8(%edx) - - emms - - ret - - - - -.Lfinish_zero: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - movq %mm3, 4(%edx) - psrlq %mm6, %mm2 - - movd %mm2, 12(%edx) - andl $32, %ecx - - popl %ebx - jz .Lfinish_zero_unaligned - - movq %mm2, 12(%edx) -.Lfinish_zero_unaligned: - - emms - - ret diff --git a/mpi/pentium4/sse2/distfiles b/mpi/pentium4/sse2/distfiles deleted file mode 100644 index 7252cd7e..00000000 --- a/mpi/pentium4/sse2/distfiles +++ /dev/null @@ -1,5 +0,0 @@ -mpih-add1.S -mpih-mul1.S -mpih-mul2.S -mpih-mul3.S -mpih-sub1.S diff --git a/mpi/pentium4/sse2/mpih-add1.S b/mpi/pentium4/sse2/mpih-add1.S deleted file mode 100644 index 55ed6630..00000000 --- a/mpi/pentium4/sse2/mpih-add1.S +++ /dev/null @@ -1,91 +0,0 @@ -/* Intel Pentium-4 mpn_add_n -- mpn addition. - * - * Copyright 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - - /******************* - * mpi_limb_t - * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_ptr_t s2_ptr, (sp + 12) - * mpi_size_t size) (sp + 16) - * - * P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 - * 6.0 cycles/limb if dst==src1 or dst==src2 - * P4 Prescott: >= 5 cycles/limb - * - * The 4 c/l achieved here isn't particularly good, but is better than 9 c/l - * for a basic adc loop. - */ - - TEXT - ALIGN (3) - GLOBL C_SYMBOL_NAME(_gcry_mpih_add_n) -C_SYMBOL_NAME(_gcry_mpih_add_n:) - - pxor %mm0, %mm0 - - movl 8(%esp), %eax /* s1_ptr */ - movl %ebx, 8(%esp) /* re-use parameter space */ - movl 12(%esp), %ebx /* res_ptr */ - movl 4(%esp), %edx /* s2_ptr */ - movl 16(%esp), %ecx /* size */ - - leal (%eax,%ecx,4), %eax /* src1 end */ - leal (%ebx,%ecx,4), %ebx /* src2 end */ - leal (%edx,%ecx,4), %edx /* dst end */ - negl %ecx /* -size */ - -Ltop: -/* - C eax src1 end - C ebx src2 end - C ecx counter, limbs, negative - C edx dst end - C mm0 carry bit -*/ - - movd (%eax,%ecx,4), %mm1 - movd (%ebx,%ecx,4), %mm2 - paddq %mm2, %mm1 - - paddq %mm1, %mm0 - movd %mm0, (%edx,%ecx,4) - - psrlq $32, %mm0 - - addl $1, %ecx - jnz Ltop - - - movd %mm0, %eax - movl 8(%esp), %ebx /* restore saved EBX */ - emms - ret diff --git a/mpi/pentium4/sse2/mpih-mul1.S b/mpi/pentium4/sse2/mpih-mul1.S deleted file mode 100644 index a0c98fb4..00000000 --- a/mpi/pentium4/sse2/mpih-mul1.S +++ /dev/null @@ -1,96 +0,0 @@ -/* Intel Pentium-4 mpn_mul_1 -- Multiply a limb vector with a limb and store - * the result in a second limb vector. - * - * Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - * - * src != dst src == dst - * P6 model 9 (Banias) ?.? - * P6 model 13 (Dothan) 4.75 4.75 - * P4 model 0 (Willamette) 4.0 6.0 - * P4 model 1 (?) 4.0 6.0 - * P4 model 2 (Northwood) 4.0 6.0 - * P4 model 3 (Prescott) ?.? ?.? - * P4 model 4 (Nocona) ?.? ?.? - * Unfortunately when src==dst the write-combining described in - * pentium4/README takes us up to 6 c/l. - * - */ - - TEXT - ALIGN (3) - GLOBL C_SYMBOL_NAME(_gcry_mpih_mul_1) -C_SYMBOL_NAME(_gcry_mpih_mul_1:); - - pxor %mm0, %mm0 - -.Lstart_1c: - movl 8(%esp), %eax - movd 16(%esp), %mm7 - movl 4(%esp), %edx - movl 12(%esp), %ecx - -.Ltop: - -/* - C eax src, incrementing - C ebx - C ecx counter, size iterations - C edx dst, incrementing - C - C mm0 carry limb - C mm7 multiplier -*/ - - movd (%eax), %mm1 - addl $4, %eax - pmuludq %mm7, %mm1 - - paddq %mm1, %mm0 - movd %mm0, (%edx) - addl $4, %edx - - psrlq $32, %mm0 - - subl $1, %ecx - jnz .Ltop - - - movd %mm0, %eax - emms - ret - diff --git a/mpi/pentium4/sse2/mpih-mul2.S b/mpi/pentium4/sse2/mpih-mul2.S deleted file mode 100644 index f975adfc..00000000 --- a/mpi/pentium4/sse2/mpih-mul2.S +++ /dev/null @@ -1,136 +0,0 @@ -/* Intel Pentium-4 mpn_addmul_1 -- Multiply a limb vector with a limb and add - * the result to a second limb vector. - * - * Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - * - * P3 model 9 (Banias) ?.? - * P3 model 13 (Dothan) 5.8 - * P4 model 0 (Willamette) 5.5 - * P4 model 1 (?) 5.5 - * P4 model 2 (Northwood) 5.5 - * P4 model 3 (Prescott) 6.0 - * P4 model 4 (Nocona) - * - * Only the carry limb propagation is on the dependent chain, but some other - * Pentium4 pipeline magic brings down performance to 6 cycles/l from the - * ideal 4 cycles/l. - */ - - - TEXT - ALIGN (4) - GLOBL C_SYMBOL_NAME(_gcry_mpih_addmul_1) -C_SYMBOL_NAME(_gcry_mpih_addmul_1:) - - pxor %mm4, %mm4 -.Lstart_1c: - movl 8(%esp), %eax - movl 12(%esp), %ecx - movl 4(%esp), %edx - movd 16(%esp), %mm7 - -/* - C eax src, incrementing ; 5B - C ecx loop counter, decrementing - C edx dst, incrementing - C - C mm4 carry, low 32-bits - C mm7 multiplier -*/ - - movd (%eax), %mm2 - pmuludq %mm7, %mm2 - - shrl $1, %ecx - jnc .Leven - - leal 4(%eax), %eax - movd (%edx), %mm1 - paddq %mm2, %mm1 - paddq %mm1, %mm4 - movd %mm4, (%edx) - psrlq $32, %mm4 - - testl %ecx, %ecx - jz .Lrtn - leal 4(%edx), %edx - - movd (%eax), %mm2 - pmuludq %mm7, %mm2 -.Leven: - movd 4(%eax), %mm0 - movd (%edx), %mm1 - pmuludq %mm7, %mm0 - - subl $1, %ecx - jz .Lend -.Lloop: - paddq %mm2, %mm1 - movd 8(%eax), %mm2 - paddq %mm1, %mm4 - movd 4(%edx), %mm3 - pmuludq %mm7, %mm2 - movd %mm4, (%edx) - psrlq $32, %mm4 - - paddq %mm0, %mm3 - movd 12(%eax), %mm0 - paddq %mm3, %mm4 - movd 8(%edx), %mm1 - pmuludq %mm7, %mm0 - movd %mm4, 4(%edx) - psrlq $32, %mm4 - - leal 8(%eax), %eax - leal 8(%edx), %edx - subl $1, %ecx - jnz .Lloop -.Lend: - paddq %mm2, %mm1 - paddq %mm1, %mm4 - movd 4(%edx), %mm3 - movd %mm4, (%edx) - psrlq $32, %mm4 - paddq %mm0, %mm3 - paddq %mm3, %mm4 - movd %mm4, 4(%edx) - psrlq $32, %mm4 -.Lrtn: - movd %mm4, %eax - emms - ret diff --git a/mpi/pentium4/sse2/mpih-mul3.S b/mpi/pentium4/sse2/mpih-mul3.S deleted file mode 100644 index ebcd2a68..00000000 --- a/mpi/pentium4/sse2/mpih-mul3.S +++ /dev/null @@ -1,127 +0,0 @@ -/* Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and - * subtract the result from a second limb vector. - * - * Copyright 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - * - * P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon - * (stepping 10). - * - * This code is not particularly good at 7 c/l. The dependent chain is only - * 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that - * speed isn't achieved. - * - * The arrangements made here to get a two instruction dependent chain are - * slightly subtle. In the loop the carry (or borrow rather) is a negative - * so that a paddq can be used to give a low limb ready to store, and a high - * limb ready to become the new carry after a psrlq. - * - * If the carry was a simple twos complement negative then the psrlq shift - * would need to bring in 0 bits or 1 bits according to whether the high was - * zero or non-zero, since a non-zero value would represent a negative - * needing sign extension. That wouldn't be particularly easy to arrange and - * certainly would add an instruction to the dependent chain, so instead an - * offset is applied so that the high limb will be 0xFFFFFFFF+c. With c in - * the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to - * 0xFFFFFFFF and is therefore always positive and can always have 0 bits - * shifted in, which is what psrlq does. - * - * The extra 0xFFFFFFFF must be subtracted before c is used, but that can be - * done off the dependent chain. The total adjustment then is to add - * 0xFFFFFFFF00000000 to offset the new carry, and subtract - * 0x00000000FFFFFFFF to remove the offset from the current carry, for a net - * add of 0xFFFFFFFE00000001. In the code this is applied to the destination - * limb when fetched. - * - * It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement - * negative, which is how it's undone for the return value, but that doesn't - * seem as clear. -*/ - - TEXT - ALIGN (4) - GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1) -C_SYMBOL_NAME(_gcry_mpih_submul_1:) - - pxor %mm1, %mm1 - -.Lstart_1c: - movl 8(%esp), %eax - pcmpeqd %mm0, %mm0 - - movd 16(%esp), %mm7 - pcmpeqd %mm6, %mm6 - - movl 4(%esp), %edx - psrlq $32, %mm0 - - movl 12(%esp), %ecx - psllq $32, %mm6 - - psubq %mm0, %mm6 - - psubq %mm1, %mm0 - -/* - C eax src, incrementing - C ebx - C ecx loop counter, decrementing - C edx dst, incrementing - C - C mm0 0xFFFFFFFF - borrow - C mm6 0xFFFFFFFE00000001 - C mm7 multiplier -*/ - -.Lloop: - movd (%eax), %mm1 - leal 4(%eax), %eax - movd (%edx), %mm2 - paddq %mm6, %mm2 - pmuludq %mm7, %mm1 - psubq %mm1, %mm2 - paddq %mm2, %mm0 - subl $1, %ecx - movd %mm0, (%edx) - psrlq $32, %mm0 - leal 4(%edx), %edx - jnz .Lloop - - movd %mm0, %eax - notl %eax - emms - ret diff --git a/mpi/pentium4/sse2/mpih-sub1.S b/mpi/pentium4/sse2/mpih-sub1.S deleted file mode 100644 index 33900c74..00000000 --- a/mpi/pentium4/sse2/mpih-sub1.S +++ /dev/null @@ -1,112 +0,0 @@ -/* Intel Pentium-4 mpn_sub_n -- mpn subtraction. - * - * Copyright 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_ptr_t s2_ptr, (sp + 12) - * mpi_size_t size) (sp + 16) - * - * P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 - * 6.0 cycles/limb if dst==src1 or dst==src2 - * P4 Prescott: >= 5 cycles/limb - * - * The main loop code is 2x unrolled so that the carry bit can alternate - * between mm0 and mm1. - */ - - -.text - ALIGN (3) - .globl C_SYMBOL_NAME(_gcry_mpih_sub_n) -C_SYMBOL_NAME(_gcry_mpih_sub_n:) - - pxor %mm0, %mm0 -.Lstart_nc: - movl 8(%esp), %eax - movl %ebx, 8(%esp) - movl 12(%esp), %ebx - movl 4(%esp), %edx - movl 16(%esp), %ecx - - leal (%eax,%ecx,4), %eax - leal (%ebx,%ecx,4), %ebx - leal (%edx,%ecx,4), %edx - negl %ecx - -.Ltop: -/* - C eax src1 end - C ebx src2 end - C ecx counter, limbs, negative - C edx dst end - C mm0 carry bit -*/ - - movd (%eax,%ecx,4), %mm1 - movd (%ebx,%ecx,4), %mm2 - psubq %mm2, %mm1 - - psubq %mm0, %mm1 - movd %mm1, (%edx,%ecx,4) - - psrlq $63, %mm1 - - addl $1, %ecx - jz .Ldone_mm1 - - movd (%eax,%ecx,4), %mm0 - movd (%ebx,%ecx,4), %mm2 - psubq %mm2, %mm0 - - psubq %mm1, %mm0 - movd %mm0, (%edx,%ecx,4) - - psrlq $63, %mm0 - - addl $1, %ecx - jnz .Ltop - - - movd %mm0, %eax - movl 8(%esp), %ebx - emms - ret - - - -.Ldone_mm1: - movd %mm1, %eax - movl 8(%esp), %ebx - emms - ret