diff --git a/build-aux/db2any b/build-aux/db2any
index a240aaa0..054479b3 100755
--- a/build-aux/db2any
+++ b/build-aux/db2any
@@ -1,489 +1,489 @@
 #!/bin/sh
 # db2any - Docbook to html/ps/info rendering
 #
 #	Copyright (C) 2000, 2001 Free Software Foundation, Inc
 #
 # This is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
 # This is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
+# SPDX-License-Identifier: GPL-2-or-later
 #
 # Note: This requires a Posix shell
 #
 # $Id$
 
 pgm="db2any"
 version="0.7.3"
 
 usage () {
     echo 'usage: db2any [--help] [options] filename' >&2
     exit 1
 }
 
 show_banner () {
 	cat <<EOF
 $pgm $version
 Copyright (C) 2001 Free Software Foundation, Inc.
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
 EOF
 }
 
 show_help () {
     show_banner
     cat <<EOF
 usage: db2any [options] docbookfile
 
 Options:
     --mode    select the mode (one of: ${all_modes})
     --help
     --nosplit
     --copyfiles
     --systemcheck
     --verbose
     --draft
     --manvolume n
     --usestyle	file
     --ignore-xref      [not yet implemented]
 
 EOF
     exit 0
 }
 
 # a list of all possible stylesheet locations
 stylesheet_dirs='
 /usr/local/lib/dsssl/stylesheets/docbook
 /usr/local/share/dsssl/stylesheets/docbook
 /usr/local/lib/sgml/stylesheet/dsssl/docbook/nwalsh
 /usr/local/share/sgml/stylesheet/dsssl/docbook/nwalsh
 /usr/lib/dsssl/stylesheets/docbook
 /usr/share/dsssl/stylesheets/docbook
 /usr/lib/sgml/stylesheet/dsssl/docbook/nwalsh
 /usr/share/sgml/stylesheet/dsssl/docbook/nwalsh
 /usr/lib/sgml/stylesheets/nwalsh-modular
 /usr/share/sgml/stylesheets/nwalsh-modular
 '
 
 all_modes="check tex html man texinfo"
 input=
 verbose=no
 draft=no
 nosplit=no
 copyfiles=no
 systemcheck_only=no
 mode=none
 manvolume="man"
 usestyle=""
 expected_args=1
 ignore_xref=no
 while test "`echo $1 | head -c1`" = "-"; do
     case $1 in
       --version)
 	echo "$pgm $version"
 	exit 0
 	;;
       --help|-h|-help)
 	show_help
 	exit 0
 	;;
       --nosplit)
 	nosplit=yes
 	;;
       --copyfiles)
 	copyfiles=yes
 	;;
       --systemcheck)
 	systemcheck_only=yes
 	expected_args=0
 	;;
       --mode)
 	shift
 	if [ $# = 0 ]; then
 	   echo "$pgm: missing argument for --mode" >&2
 	   exit 1
 	fi
 	mode="$1"
 	;;
       --manvolume)
 	shift
 	if [ $# = 0 ]; then
 	   echo "$pgm: missing argument for --manvolume" >&2
 	   exit 1
 	fi
 	manvolume="$1"
 	;;
       --usestyle)
 	shift
 	if [ $# = 0 ]; then
 	   echo "$pgm: missing argument for --usestyle" >&2
 	   exit 1
 	fi
 	usestyle="$1"
 	;;
       --verbose)
 	verbose=yes
 	;;
       --draft)
 	draft=yes
 	;;
       --ignore-xref)
         ignore_xref=yes
         ;;
       --)
 	shift
 	break
 	;;
       *)
 	echo "$pgm: invalid option $1" >&2
 	exit 1
 	;;
     esac
     shift
 done
 
 if [ $# = $expected_args ]; then
     if [ $# = 1 ]; then
        input="$1"
     fi
 else
    usage
 fi
 
 # check the mode
 case "$mode" in
     html|HTML)
 	mode=html
 	;;
     tex|TEX|TeX|dvi)
 	mode=dvi
 	;;
     man|MAN)
 	mode=man
 	;;
     texi|TEXI|texinfo|TEXINFO)
 	mode=texinfo
 	;;
     check)
 	;;
     none)
 	if [ $systemcheck_only != yes ]; then
 	    echo "$pgm: no mode specified; use one of: ${all_modes}" >&2
 	    exit 1
 	fi
 	;;
     *)
 	echo "$pgm: invalid mode $mode" >&2
 	echo "$pgm: valid modes are: ${all_modes}"  >&2
 	exit 1
 	;;
 esac
 
 if [ -n $usestyle ]; then
    if [ ! -f $usestyle ]; then
       echo "$pgm: cannot access local stylesteet" >&2
       exit 1
    fi
 fi
 
 #######################################
 #   Options are all parsed here       #
 #######################################
 
 # check whether the given program is availbale in the path
 check_prog () {
     tmp="$1"
     save_ifs="$IFS"; IFS=":"
     for i in $PATH; do
         test -z "$i" && i=.
         if test -f $i/$tmp; then
              IFS="$save_ifs"
              return 0
         fi
     done
     IFS="$save_ifs"
     return 1
 }
 
 
 # Figure out all what we need to know about the system we are
 # running on and where the Docbook tools are installed.
 # Tell about it when running in verbose mode.
 do_systemcheck () {
     # look for Jade
     jade_version=`jade -v </dev/null 2>&1 | \
 		   sed -n 's/.*:I:.*Jade version "\([0-9.]*\)"/\1/p'`
     if [ -z "$jade_version" ]; then
 	echo "$pgm: error: jade not found" >&2
 	exit 1
     fi
     [ $verbose = yes ] && echo "$pgm: Jade version $jade_version found" >&2
 
     # look for JadeTeX
     if ! jadetex -v 2>/dev/null | grep -q '^TeX' ; then
 	echo "$pgm: error: jadetex not found" >&2
 	exit 1
     fi
 
     # look for the docbook-to-man script. Fixme: we should check that it
     # it is a recent version.
     if ! docbook-to-man 2>&1 | grep -q '^usage' ; then
 	echo "$pgm: error: docbook-to-man not found" >&2
 	exit 1
     fi
     [ $verbose = yes ] && echo "$pgm: docbook-to-man found" >&2
 
     # look for the docbook-to-texi scripts.
     if ! check_prog docbook2texi ;  then
 	echo "$pgm: error: docbook2texi not found" >&2
 	exit 1
     fi
     [ $verbose = yes ] && echo "$pgm: docbook2texi found" >&2
     if ! sgml2xml -v /dev/null 2>&1 | grep 'SP version' >/dev/null 2>&1 ; then
 	echo "$pgm: error: sgml2xml not found" >&2
 	exit 1
     fi
     [ $verbose = yes ] && echo "$pgm: sgml2xml found" >&2
 
     # figure out where our stylesheets are
     tex_stylesheet=none
     for d in ${stylesheet_dirs}; do
 	file=${d}/print/docbook.dsl
 	if [ -f $file ]; then
 	    tex_stylesheet=$file
 	    break
 	fi
     done
     [ $verbose = yes ] && echo "$pgm: TeX stylesheet: ${tex_stylesheet}" >&2
     html_stylesheet=none
     for d in ${stylesheet_dirs}; do
 	file=${d}/html/docbook.dsl
 	if [ -f $file ]; then
 	    html_stylesheet=$file
 	    break
 	fi
     done
     [ $verbose = yes ] && echo "$pgm: HTML stylesheet: ${html_stylesheet}" >&2
     if [ $tex_stylesheet = none -o $html_stylesheet = none ]; then
 	echo "$pgm: error: stylesheets not found" >&2
 	exit 1
     fi
 
 }
 
 
 # Render the docbook as HTML
 render_html () {
     output="`basename $input| sed 's/\.sgml$//'`.html"
 
     if [ -n "$usestyle" ]; then
 	tmpstyle="`pwd`/`basename $usestyle`-html.tmp"
 	if [ ! -f $tmpstyle -o $usestyle -nt $tmpstyle ]; then
 	    sed "s%@DOCBOOK_DSL@%$html_stylesheet%" $usestyle > $tmpstyle
 	fi
     else
 	tmpstyle="$tex_stylesheet"
     fi
 
 
     # --nosplts creates just one HTML file
     if test $nosplit = yes; then
 	echo "running jade on '$input' ..." >&2
 	jade -D . -d $tmpstyle -t sgml -i html -V nochunks $input > $output
 	echo "$output created"
 	return 0
     fi
 
     # Make sure that we have a html subdir
     if test -d html ; then
 	:
     else
 	if mkdir html; then
 	    echo "'html' directory created" >&2
 	else
 	    echo "failed to create 'html' directory" >&2
 	    exit 1
 	fi
     fi
 
     outputdir="html/`basename $input| sed 's/\.sgml$//'`"
 
     if test -d $outputdir ; then
 	:
     else
 	if mkdir $outputdir; then
 	    echo "'$outputdir' created" >&2
 	else
 	    echo "failed to create '$outputdir'" >&2
 	    exit 1
 	fi
     fi
     echo "creating html pages in '$outputdir' ..." >&2
     if test "$input" = "`basename $input`"; then
 	inp="../../$input"
     else
 	inp="$input"
     fi
 
     [ $verbose = yes ] && echo "running jade on '$inp' ..." >&2
     (cd $outputdir && jade -D . -t sgml -i html -d $tmpstyle $inp )
     [ $verbose = yes ] && echo "html version in '$outputdir' created" >&2
 
     # break out all filerefs and copy them to the outputdirectory
     # fixme: handling of path components is wrong
     if test $copyfiles = yes; then
 	echo "looking for filerefs ..." >&2
 	for file in `nsgmls -i html $input \
 			| awk '/^AFILEREF[ \t]+CDATA/ {print $3}'`; do
 	    d=$outputdir/`basename $file`
 	    if cat $file > $outputdir/`basename $file` ; then
 		echo "  $file -> $d" >&2
 	    fi
 	done
     fi
 
     mainfile=`ls $outputdir/${doctype}* | head -1`
 
     # create a html index file for it, so that we can more easy
     # find the rendred pages
     cat > $output <<EOF
 <html><title>$output</title>
 <body>
 
 <a href="$mainfile">$mainfile</a>
 
 </body>
 </html>
 EOF
 
     [ $verbose = yes ] && echo "$output created with link to '$mainfile'" >&2
 }
 
 
 # This function expects the source file in $texfile and
 # the name of the logfle in $logfile
 run_jadetex () {
     [ -f $logfile ] && rm $logfile
     jadetex $texfile
     if ! tail $logfile | grep -q '^Output written on'; then
 	echo "JadeTeX failed" >&2
 	exit 1
     fi
 }
 
 # Render the docbook to DVI
 render_dvi () {
     output="`basename $input| sed 's/\.sgml$//'`.dvi"
     texfile="`basename $input| sed 's/\.sgml$//'`.tex"
     logfile="`basename $input| sed 's/\.sgml$//'`.log"
     auxfile="`basename $input| sed 's/\.sgml$//'`.aux"
 
     if [ -n "$usestyle" ]; then
 	tmpstyle="`basename $usestyle`-tex.tmp"
 	if [ ! -f $tmpstyle -o $usestyle -nt $tmpstyle ]; then
 	    sed "s%@DOCBOOK_DSL@%$tex_stylesheet%" $usestyle > $tmpstyle
 	fi
     else
 	tmpstyle="$tex_stylesheet"
     fi
     [ $verbose = yes ] && echo "running jade on '$input' ..." >&2
     jade -D . -t tex -i tex -d $tmpstyle -o $texfile $input
     if ! tail $texfile | grep -q '\\endFOT{}'; then
 	echo "Jade failed" >&2
 	exit 1
     fi
 
     # Better delete the aux file first
     [ -f $auxfile ] && rm $auxfile
     # The first run won't get the references right, so we have to
     # run it 2 or 3 times.  JadeTex doesn't indicate whether a third
     # run is required, so we do it always.
     run_jadetex
     if [ $draft = no ]; then
 	if tail -100 $logfile \
 	   | grep -q '^LaTeX Warning: There were undefined references'; then
 	   echo 'running JadeTeX a second and third 2time' >&2
 	   run_jadetex
 	   run_jadetex
 	fi
     fi
 
     [ $verbose = yes ] && echo "$output created as '$output'" >&2
 }
 
 # Render the docbook to troff
 render_man () {
     output="`basename $input| sed 's/\.sgml$//'`.$manvolume"
 
     [ $verbose = yes ] && echo "running docbook-to-man on '$input' ..." >&2
     docbook-to-man $input > $output
     [ $verbose = yes ] && echo "man page '$output' created" >&2
 }
 
 # Render the docbook to texinfo
 render_texinfo () {
     output="`basename $input| sed 's/\.sgml$/.texi/'`"
     tmpxml="`basename $input| sed 's/\.sgml$/.xml/'`"
 
     [ $verbose = yes ] && echo "running sgml2xml on '$input' ..." >&2
     sgml2xml -x lower $input > $tmpxml
     [ $verbose = yes ] && echo "running docbook2texi on '$tmpxml' ..." >&2
     docbook2texi $tmpxml | sed 's,--,---,' >$output
     rm $tmpxml
     [ $verbose = yes ] && echo "texinfo '$output' created" >&2
 
 }
 
 #######################################
 #	 main function		      #
 #######################################
 
 do_systemcheck
 [ $systemcheck_only = yes ] && exit 0
 
 if [ ! -f "$input" ]; then
     input="$input.sgml"
     if [ ! -f "$input" ]; then
 	echo "$pgm: '$input': no such file" >&2
 	exit 1
     fi
 fi
 
 # grep the document type
 doctype=`grep -i '\<doctype' $input|awk 'NR==1 {print $2}'| tr '[A-Z]' '[a-z]'`
 if test -z "$doctype"; then
     echo "$pgm: error: no DOCTYPE declaration found" >&2
     exit 1
 fi
 [ $verbose = yes ] &&  echo "$input: DOCTYPE is '$doctype'" >&2
 
 case $mode in
     check)
 	nsgmls -vs $input
 	exit $?
 	;;
     html)
 	render_html
 	;;
     dvi)
 	render_dvi
 	;;
     man)
 	render_man
 	;;
     texinfo)
 	render_texinfo
 	;;
 esac
 
 
 exit 0
diff --git a/cipher/arcfour.c b/cipher/arcfour.c
index 353de00b..2e39cd3b 100644
--- a/cipher/arcfour.c
+++ b/cipher/arcfour.c
@@ -1,216 +1,216 @@
 /* arcfour.c  -  The arcfour stream cipher
  *	Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * For a description of the algorithm, see:
  *   Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996.
  *   ISBN 0-471-11709-9. Pages 397 ff.
  */
 
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "types.h"
 #include "g10lib.h"
 #include "cipher.h"
 #include "cipher-internal.h"
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 static const char *selftest(void);
 
 #ifdef USE_AMD64_ASM
 
 typedef struct {
     u32 sbox[256];
     u32 idx_i, idx_j;
 } ARCFOUR_context;
 
 void _gcry_arcfour_amd64(void *key, size_t len, const byte *indata,
 			 byte *outdata);
 
 static void
 encrypt_stream (void *context,
                 byte *outbuf, const byte *inbuf, size_t length)
 {
   _gcry_arcfour_amd64 (context, length, inbuf, outbuf );
 }
 
 #else /*!USE_AMD64_ASM*/
 
 typedef struct {
     byte sbox[256];
     int idx_i, idx_j;
 } ARCFOUR_context;
 
 static void
 do_encrypt_stream( ARCFOUR_context *ctx,
 		   byte *outbuf, const byte *inbuf, size_t length )
 {
 #ifndef __i386__
   register unsigned int i = ctx->idx_i;
   register byte j = ctx->idx_j;
   register byte *sbox = ctx->sbox;
   register byte t, u;
 
   while ( length-- )
     {
       i++;
       t = sbox[(byte)i];
       j += t;
       u = sbox[j];
       sbox[(byte)i] = u;
       u += t;
       sbox[j] = t;
       *outbuf++ = sbox[u] ^ *inbuf++;
     }
 
   ctx->idx_i = (byte)i;
   ctx->idx_j = (byte)j;
 #else /*__i386__*/
   /* Old implementation of arcfour is faster on i386 than the version above.
    * This is because version above increases register pressure which on i386
    * would push some of the variables to memory/stack.  Therefore keep this
    * version for i386 to avoid regressing performance.  */
   register int i = ctx->idx_i;
   register int j = ctx->idx_j;
   register byte *sbox = ctx->sbox;
   register int t;
 
   while ( length-- )
     {
       i++;
       i = i & 255; /* The and-op seems to be faster than the mod-op. */
       j += sbox[i];
       j &= 255;
       t = sbox[i]; sbox[i] = sbox[j]; sbox[j] = t;
       *outbuf++ = *inbuf++ ^ sbox[(sbox[i] + sbox[j]) & 255];
     }
 
   ctx->idx_i = i;
   ctx->idx_j = j;
 #endif
 }
 
 static void
 encrypt_stream (void *context,
                 byte *outbuf, const byte *inbuf, size_t length)
 {
   ARCFOUR_context *ctx = (ARCFOUR_context *) context;
   do_encrypt_stream (ctx, outbuf, inbuf, length );
   _gcry_burn_stack (64);
 }
 
 #endif /*!USE_AMD64_ASM*/
 
 
 static gcry_err_code_t
 do_arcfour_setkey (void *context, const byte *key, unsigned int keylen)
 {
   static int initialized;
   static const char* selftest_failed;
   int i, j;
   byte karr[256];
   ARCFOUR_context *ctx = (ARCFOUR_context *) context;
 
   if (!initialized )
     {
       initialized = 1;
       selftest_failed = selftest();
       if( selftest_failed )
         log_error ("ARCFOUR selftest failed (%s)\n", selftest_failed );
     }
   if( selftest_failed )
     return GPG_ERR_SELFTEST_FAILED;
 
   if( keylen < 40/8 ) /* we want at least 40 bits */
     return GPG_ERR_INV_KEYLEN;
 
   ctx->idx_i = ctx->idx_j = 0;
   for (i=0; i < 256; i++ )
     ctx->sbox[i] = i;
   for (i=j=0; i < 256; i++,j++ )
     {
       if (j >= keylen)
         j = 0;
       karr[i] = key[j];
     }
   for (i=j=0; i < 256; i++ )
     {
       int t;
       j = (j + ctx->sbox[i] + karr[i]) & 255;
       t = ctx->sbox[i];
       ctx->sbox[i] = ctx->sbox[j];
       ctx->sbox[j] = t;
     }
   wipememory( karr, sizeof(karr) );
 
   return GPG_ERR_NO_ERROR;
 }
 
 static gcry_err_code_t
 arcfour_setkey ( void *context, const byte *key, unsigned int keylen,
                  cipher_bulk_ops_t *bulk_ops )
 {
   ARCFOUR_context *ctx = (ARCFOUR_context *) context;
   gcry_err_code_t rc = do_arcfour_setkey (ctx, key, keylen );
   (void)bulk_ops;
   return rc;
 }
 
 
 static const char*
 selftest(void)
 {
   ARCFOUR_context ctx;
   byte scratch[16];
 
   /* Test vector from Cryptlib labeled there: "from the
      State/Commerce Department". */
   static const byte key_1[] =
     { 0x61, 0x8A, 0x63, 0xD2, 0xFB };
   static const byte plaintext_1[] =
     { 0xDC, 0xEE, 0x4C, 0xF9, 0x2C };
   static const byte ciphertext_1[] =
     { 0xF1, 0x38, 0x29, 0xC9, 0xDE };
 
   arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL);
   encrypt_stream( &ctx, scratch, plaintext_1, sizeof(plaintext_1));
   if ( memcmp (scratch, ciphertext_1, sizeof (ciphertext_1)))
     return "Arcfour encryption test 1 failed.";
   arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL);
   encrypt_stream(&ctx, scratch, scratch, sizeof(plaintext_1)); /* decrypt */
   if ( memcmp (scratch, plaintext_1, sizeof (plaintext_1)))
     return "Arcfour decryption test 1 failed.";
   return NULL;
 }
 
 
 gcry_cipher_spec_t _gcry_cipher_spec_arcfour =
   {
     GCRY_CIPHER_ARCFOUR, {0, 0},
     "ARCFOUR", NULL, NULL, 1, 128, sizeof (ARCFOUR_context),
     arcfour_setkey, NULL, NULL, encrypt_stream, encrypt_stream,
   };
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index 1b11d718..5f431eff 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -1,1089 +1,1089 @@
 /* blowfish.c  -  Blowfish encryption
  *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * For a description of the algorithm, see:
  *   Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996.
  *   ISBN 0-471-11709-9. Pages 336 ff.
  */
 
 /* Test values:
  * key	  "abcdefghijklmnopqrstuvwxyz";
  * plain  "BLOWFISH"
  * cipher 32 4E D0 FE F4 13 A2 03
  *
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "types.h"
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
 #include "cipher-internal.h"
 
 #define BLOWFISH_BLOCKSIZE 8
 #define BLOWFISH_KEY_MIN_BITS 8
 #define BLOWFISH_KEY_MAX_BITS 576
 
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 /* USE_ARM_ASM indicates whether to use ARM assembly code. */
 #undef USE_ARM_ASM
 #if defined(__ARMEL__)
 # if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
 #  define USE_ARM_ASM 1
 # endif
 #endif
 
 typedef struct {
     u32 s0[256];
     u32 s1[256];
     u32 s2[256];
     u32 s3[256];
     u32 p[16+2];
 } BLOWFISH_context;
 
 static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen,
                                   cipher_bulk_ops_t *bulk_ops);
 static unsigned int encrypt_block (void *bc, byte *outbuf, const byte *inbuf);
 static unsigned int decrypt_block (void *bc, byte *outbuf, const byte *inbuf);
 
 
 /* precomputed S boxes */
 static const u32 ks0[256] = {
     0xD1310BA6,0x98DFB5AC,0x2FFD72DB,0xD01ADFB7,0xB8E1AFED,0x6A267E96,
     0xBA7C9045,0xF12C7F99,0x24A19947,0xB3916CF7,0x0801F2E2,0x858EFC16,
     0x636920D8,0x71574E69,0xA458FEA3,0xF4933D7E,0x0D95748F,0x728EB658,
     0x718BCD58,0x82154AEE,0x7B54A41D,0xC25A59B5,0x9C30D539,0x2AF26013,
     0xC5D1B023,0x286085F0,0xCA417918,0xB8DB38EF,0x8E79DCB0,0x603A180E,
     0x6C9E0E8B,0xB01E8A3E,0xD71577C1,0xBD314B27,0x78AF2FDA,0x55605C60,
     0xE65525F3,0xAA55AB94,0x57489862,0x63E81440,0x55CA396A,0x2AAB10B6,
     0xB4CC5C34,0x1141E8CE,0xA15486AF,0x7C72E993,0xB3EE1411,0x636FBC2A,
     0x2BA9C55D,0x741831F6,0xCE5C3E16,0x9B87931E,0xAFD6BA33,0x6C24CF5C,
     0x7A325381,0x28958677,0x3B8F4898,0x6B4BB9AF,0xC4BFE81B,0x66282193,
     0x61D809CC,0xFB21A991,0x487CAC60,0x5DEC8032,0xEF845D5D,0xE98575B1,
     0xDC262302,0xEB651B88,0x23893E81,0xD396ACC5,0x0F6D6FF3,0x83F44239,
     0x2E0B4482,0xA4842004,0x69C8F04A,0x9E1F9B5E,0x21C66842,0xF6E96C9A,
     0x670C9C61,0xABD388F0,0x6A51A0D2,0xD8542F68,0x960FA728,0xAB5133A3,
     0x6EEF0B6C,0x137A3BE4,0xBA3BF050,0x7EFB2A98,0xA1F1651D,0x39AF0176,
     0x66CA593E,0x82430E88,0x8CEE8619,0x456F9FB4,0x7D84A5C3,0x3B8B5EBE,
     0xE06F75D8,0x85C12073,0x401A449F,0x56C16AA6,0x4ED3AA62,0x363F7706,
     0x1BFEDF72,0x429B023D,0x37D0D724,0xD00A1248,0xDB0FEAD3,0x49F1C09B,
     0x075372C9,0x80991B7B,0x25D479D8,0xF6E8DEF7,0xE3FE501A,0xB6794C3B,
     0x976CE0BD,0x04C006BA,0xC1A94FB6,0x409F60C4,0x5E5C9EC2,0x196A2463,
     0x68FB6FAF,0x3E6C53B5,0x1339B2EB,0x3B52EC6F,0x6DFC511F,0x9B30952C,
     0xCC814544,0xAF5EBD09,0xBEE3D004,0xDE334AFD,0x660F2807,0x192E4BB3,
     0xC0CBA857,0x45C8740F,0xD20B5F39,0xB9D3FBDB,0x5579C0BD,0x1A60320A,
     0xD6A100C6,0x402C7279,0x679F25FE,0xFB1FA3CC,0x8EA5E9F8,0xDB3222F8,
     0x3C7516DF,0xFD616B15,0x2F501EC8,0xAD0552AB,0x323DB5FA,0xFD238760,
     0x53317B48,0x3E00DF82,0x9E5C57BB,0xCA6F8CA0,0x1A87562E,0xDF1769DB,
     0xD542A8F6,0x287EFFC3,0xAC6732C6,0x8C4F5573,0x695B27B0,0xBBCA58C8,
     0xE1FFA35D,0xB8F011A0,0x10FA3D98,0xFD2183B8,0x4AFCB56C,0x2DD1D35B,
     0x9A53E479,0xB6F84565,0xD28E49BC,0x4BFB9790,0xE1DDF2DA,0xA4CB7E33,
     0x62FB1341,0xCEE4C6E8,0xEF20CADA,0x36774C01,0xD07E9EFE,0x2BF11FB4,
     0x95DBDA4D,0xAE909198,0xEAAD8E71,0x6B93D5A0,0xD08ED1D0,0xAFC725E0,
     0x8E3C5B2F,0x8E7594B7,0x8FF6E2FB,0xF2122B64,0x8888B812,0x900DF01C,
     0x4FAD5EA0,0x688FC31C,0xD1CFF191,0xB3A8C1AD,0x2F2F2218,0xBE0E1777,
     0xEA752DFE,0x8B021FA1,0xE5A0CC0F,0xB56F74E8,0x18ACF3D6,0xCE89E299,
     0xB4A84FE0,0xFD13E0B7,0x7CC43B81,0xD2ADA8D9,0x165FA266,0x80957705,
     0x93CC7314,0x211A1477,0xE6AD2065,0x77B5FA86,0xC75442F5,0xFB9D35CF,
     0xEBCDAF0C,0x7B3E89A0,0xD6411BD3,0xAE1E7E49,0x00250E2D,0x2071B35E,
     0x226800BB,0x57B8E0AF,0x2464369B,0xF009B91E,0x5563911D,0x59DFA6AA,
     0x78C14389,0xD95A537F,0x207D5BA2,0x02E5B9C5,0x83260376,0x6295CFA9,
     0x11C81968,0x4E734A41,0xB3472DCA,0x7B14A94A,0x1B510052,0x9A532915,
     0xD60F573F,0xBC9BC6E4,0x2B60A476,0x81E67400,0x08BA6FB5,0x571BE91F,
     0xF296EC6B,0x2A0DD915,0xB6636521,0xE7B9F9B6,0xFF34052E,0xC5855664,
     0x53B02D5D,0xA99F8FA1,0x08BA4799,0x6E85076A };
 
 static const u32 ks1[256] = {
     0x4B7A70E9,0xB5B32944,0xDB75092E,0xC4192623,0xAD6EA6B0,0x49A7DF7D,
     0x9CEE60B8,0x8FEDB266,0xECAA8C71,0x699A17FF,0x5664526C,0xC2B19EE1,
     0x193602A5,0x75094C29,0xA0591340,0xE4183A3E,0x3F54989A,0x5B429D65,
     0x6B8FE4D6,0x99F73FD6,0xA1D29C07,0xEFE830F5,0x4D2D38E6,0xF0255DC1,
     0x4CDD2086,0x8470EB26,0x6382E9C6,0x021ECC5E,0x09686B3F,0x3EBAEFC9,
     0x3C971814,0x6B6A70A1,0x687F3584,0x52A0E286,0xB79C5305,0xAA500737,
     0x3E07841C,0x7FDEAE5C,0x8E7D44EC,0x5716F2B8,0xB03ADA37,0xF0500C0D,
     0xF01C1F04,0x0200B3FF,0xAE0CF51A,0x3CB574B2,0x25837A58,0xDC0921BD,
     0xD19113F9,0x7CA92FF6,0x94324773,0x22F54701,0x3AE5E581,0x37C2DADC,
     0xC8B57634,0x9AF3DDA7,0xA9446146,0x0FD0030E,0xECC8C73E,0xA4751E41,
     0xE238CD99,0x3BEA0E2F,0x3280BBA1,0x183EB331,0x4E548B38,0x4F6DB908,
     0x6F420D03,0xF60A04BF,0x2CB81290,0x24977C79,0x5679B072,0xBCAF89AF,
     0xDE9A771F,0xD9930810,0xB38BAE12,0xDCCF3F2E,0x5512721F,0x2E6B7124,
     0x501ADDE6,0x9F84CD87,0x7A584718,0x7408DA17,0xBC9F9ABC,0xE94B7D8C,
     0xEC7AEC3A,0xDB851DFA,0x63094366,0xC464C3D2,0xEF1C1847,0x3215D908,
     0xDD433B37,0x24C2BA16,0x12A14D43,0x2A65C451,0x50940002,0x133AE4DD,
     0x71DFF89E,0x10314E55,0x81AC77D6,0x5F11199B,0x043556F1,0xD7A3C76B,
     0x3C11183B,0x5924A509,0xF28FE6ED,0x97F1FBFA,0x9EBABF2C,0x1E153C6E,
     0x86E34570,0xEAE96FB1,0x860E5E0A,0x5A3E2AB3,0x771FE71C,0x4E3D06FA,
     0x2965DCB9,0x99E71D0F,0x803E89D6,0x5266C825,0x2E4CC978,0x9C10B36A,
     0xC6150EBA,0x94E2EA78,0xA5FC3C53,0x1E0A2DF4,0xF2F74EA7,0x361D2B3D,
     0x1939260F,0x19C27960,0x5223A708,0xF71312B6,0xEBADFE6E,0xEAC31F66,
     0xE3BC4595,0xA67BC883,0xB17F37D1,0x018CFF28,0xC332DDEF,0xBE6C5AA5,
     0x65582185,0x68AB9802,0xEECEA50F,0xDB2F953B,0x2AEF7DAD,0x5B6E2F84,
     0x1521B628,0x29076170,0xECDD4775,0x619F1510,0x13CCA830,0xEB61BD96,
     0x0334FE1E,0xAA0363CF,0xB5735C90,0x4C70A239,0xD59E9E0B,0xCBAADE14,
     0xEECC86BC,0x60622CA7,0x9CAB5CAB,0xB2F3846E,0x648B1EAF,0x19BDF0CA,
     0xA02369B9,0x655ABB50,0x40685A32,0x3C2AB4B3,0x319EE9D5,0xC021B8F7,
     0x9B540B19,0x875FA099,0x95F7997E,0x623D7DA8,0xF837889A,0x97E32D77,
     0x11ED935F,0x16681281,0x0E358829,0xC7E61FD6,0x96DEDFA1,0x7858BA99,
     0x57F584A5,0x1B227263,0x9B83C3FF,0x1AC24696,0xCDB30AEB,0x532E3054,
     0x8FD948E4,0x6DBC3128,0x58EBF2EF,0x34C6FFEA,0xFE28ED61,0xEE7C3C73,
     0x5D4A14D9,0xE864B7E3,0x42105D14,0x203E13E0,0x45EEE2B6,0xA3AAABEA,
     0xDB6C4F15,0xFACB4FD0,0xC742F442,0xEF6ABBB5,0x654F3B1D,0x41CD2105,
     0xD81E799E,0x86854DC7,0xE44B476A,0x3D816250,0xCF62A1F2,0x5B8D2646,
     0xFC8883A0,0xC1C7B6A3,0x7F1524C3,0x69CB7492,0x47848A0B,0x5692B285,
     0x095BBF00,0xAD19489D,0x1462B174,0x23820E00,0x58428D2A,0x0C55F5EA,
     0x1DADF43E,0x233F7061,0x3372F092,0x8D937E41,0xD65FECF1,0x6C223BDB,
     0x7CDE3759,0xCBEE7460,0x4085F2A7,0xCE77326E,0xA6078084,0x19F8509E,
     0xE8EFD855,0x61D99735,0xA969A7AA,0xC50C06C2,0x5A04ABFC,0x800BCADC,
     0x9E447A2E,0xC3453484,0xFDD56705,0x0E1E9EC9,0xDB73DBD3,0x105588CD,
     0x675FDA79,0xE3674340,0xC5C43465,0x713E38D8,0x3D28F89E,0xF16DFF20,
     0x153E21E7,0x8FB03D4A,0xE6E39F2B,0xDB83ADF7 };
 
 static const u32 ks2[256] = {
     0xE93D5A68,0x948140F7,0xF64C261C,0x94692934,0x411520F7,0x7602D4F7,
     0xBCF46B2E,0xD4A20068,0xD4082471,0x3320F46A,0x43B7D4B7,0x500061AF,
     0x1E39F62E,0x97244546,0x14214F74,0xBF8B8840,0x4D95FC1D,0x96B591AF,
     0x70F4DDD3,0x66A02F45,0xBFBC09EC,0x03BD9785,0x7FAC6DD0,0x31CB8504,
     0x96EB27B3,0x55FD3941,0xDA2547E6,0xABCA0A9A,0x28507825,0x530429F4,
     0x0A2C86DA,0xE9B66DFB,0x68DC1462,0xD7486900,0x680EC0A4,0x27A18DEE,
     0x4F3FFEA2,0xE887AD8C,0xB58CE006,0x7AF4D6B6,0xAACE1E7C,0xD3375FEC,
     0xCE78A399,0x406B2A42,0x20FE9E35,0xD9F385B9,0xEE39D7AB,0x3B124E8B,
     0x1DC9FAF7,0x4B6D1856,0x26A36631,0xEAE397B2,0x3A6EFA74,0xDD5B4332,
     0x6841E7F7,0xCA7820FB,0xFB0AF54E,0xD8FEB397,0x454056AC,0xBA489527,
     0x55533A3A,0x20838D87,0xFE6BA9B7,0xD096954B,0x55A867BC,0xA1159A58,
     0xCCA92963,0x99E1DB33,0xA62A4A56,0x3F3125F9,0x5EF47E1C,0x9029317C,
     0xFDF8E802,0x04272F70,0x80BB155C,0x05282CE3,0x95C11548,0xE4C66D22,
     0x48C1133F,0xC70F86DC,0x07F9C9EE,0x41041F0F,0x404779A4,0x5D886E17,
     0x325F51EB,0xD59BC0D1,0xF2BCC18F,0x41113564,0x257B7834,0x602A9C60,
     0xDFF8E8A3,0x1F636C1B,0x0E12B4C2,0x02E1329E,0xAF664FD1,0xCAD18115,
     0x6B2395E0,0x333E92E1,0x3B240B62,0xEEBEB922,0x85B2A20E,0xE6BA0D99,
     0xDE720C8C,0x2DA2F728,0xD0127845,0x95B794FD,0x647D0862,0xE7CCF5F0,
     0x5449A36F,0x877D48FA,0xC39DFD27,0xF33E8D1E,0x0A476341,0x992EFF74,
     0x3A6F6EAB,0xF4F8FD37,0xA812DC60,0xA1EBDDF8,0x991BE14C,0xDB6E6B0D,
     0xC67B5510,0x6D672C37,0x2765D43B,0xDCD0E804,0xF1290DC7,0xCC00FFA3,
     0xB5390F92,0x690FED0B,0x667B9FFB,0xCEDB7D9C,0xA091CF0B,0xD9155EA3,
     0xBB132F88,0x515BAD24,0x7B9479BF,0x763BD6EB,0x37392EB3,0xCC115979,
     0x8026E297,0xF42E312D,0x6842ADA7,0xC66A2B3B,0x12754CCC,0x782EF11C,
     0x6A124237,0xB79251E7,0x06A1BBE6,0x4BFB6350,0x1A6B1018,0x11CAEDFA,
     0x3D25BDD8,0xE2E1C3C9,0x44421659,0x0A121386,0xD90CEC6E,0xD5ABEA2A,
     0x64AF674E,0xDA86A85F,0xBEBFE988,0x64E4C3FE,0x9DBC8057,0xF0F7C086,
     0x60787BF8,0x6003604D,0xD1FD8346,0xF6381FB0,0x7745AE04,0xD736FCCC,
     0x83426B33,0xF01EAB71,0xB0804187,0x3C005E5F,0x77A057BE,0xBDE8AE24,
     0x55464299,0xBF582E61,0x4E58F48F,0xF2DDFDA2,0xF474EF38,0x8789BDC2,
     0x5366F9C3,0xC8B38E74,0xB475F255,0x46FCD9B9,0x7AEB2661,0x8B1DDF84,
     0x846A0E79,0x915F95E2,0x466E598E,0x20B45770,0x8CD55591,0xC902DE4C,
     0xB90BACE1,0xBB8205D0,0x11A86248,0x7574A99E,0xB77F19B6,0xE0A9DC09,
     0x662D09A1,0xC4324633,0xE85A1F02,0x09F0BE8C,0x4A99A025,0x1D6EFE10,
     0x1AB93D1D,0x0BA5A4DF,0xA186F20F,0x2868F169,0xDCB7DA83,0x573906FE,
     0xA1E2CE9B,0x4FCD7F52,0x50115E01,0xA70683FA,0xA002B5C4,0x0DE6D027,
     0x9AF88C27,0x773F8641,0xC3604C06,0x61A806B5,0xF0177A28,0xC0F586E0,
     0x006058AA,0x30DC7D62,0x11E69ED7,0x2338EA63,0x53C2DD94,0xC2C21634,
     0xBBCBEE56,0x90BCB6DE,0xEBFC7DA1,0xCE591D76,0x6F05E409,0x4B7C0188,
     0x39720A3D,0x7C927C24,0x86E3725F,0x724D9DB9,0x1AC15BB4,0xD39EB8FC,
     0xED545578,0x08FCA5B5,0xD83D7CD3,0x4DAD0FC4,0x1E50EF5E,0xB161E6F8,
     0xA28514D9,0x6C51133C,0x6FD5C7E7,0x56E14EC4,0x362ABFCE,0xDDC6C837,
     0xD79A3234,0x92638212,0x670EFA8E,0x406000E0 };
 
 static const u32 ks3[256] = {
     0x3A39CE37,0xD3FAF5CF,0xABC27737,0x5AC52D1B,0x5CB0679E,0x4FA33742,
     0xD3822740,0x99BC9BBE,0xD5118E9D,0xBF0F7315,0xD62D1C7E,0xC700C47B,
     0xB78C1B6B,0x21A19045,0xB26EB1BE,0x6A366EB4,0x5748AB2F,0xBC946E79,
     0xC6A376D2,0x6549C2C8,0x530FF8EE,0x468DDE7D,0xD5730A1D,0x4CD04DC6,
     0x2939BBDB,0xA9BA4650,0xAC9526E8,0xBE5EE304,0xA1FAD5F0,0x6A2D519A,
     0x63EF8CE2,0x9A86EE22,0xC089C2B8,0x43242EF6,0xA51E03AA,0x9CF2D0A4,
     0x83C061BA,0x9BE96A4D,0x8FE51550,0xBA645BD6,0x2826A2F9,0xA73A3AE1,
     0x4BA99586,0xEF5562E9,0xC72FEFD3,0xF752F7DA,0x3F046F69,0x77FA0A59,
     0x80E4A915,0x87B08601,0x9B09E6AD,0x3B3EE593,0xE990FD5A,0x9E34D797,
     0x2CF0B7D9,0x022B8B51,0x96D5AC3A,0x017DA67D,0xD1CF3ED6,0x7C7D2D28,
     0x1F9F25CF,0xADF2B89B,0x5AD6B472,0x5A88F54C,0xE029AC71,0xE019A5E6,
     0x47B0ACFD,0xED93FA9B,0xE8D3C48D,0x283B57CC,0xF8D56629,0x79132E28,
     0x785F0191,0xED756055,0xF7960E44,0xE3D35E8C,0x15056DD4,0x88F46DBA,
     0x03A16125,0x0564F0BD,0xC3EB9E15,0x3C9057A2,0x97271AEC,0xA93A072A,
     0x1B3F6D9B,0x1E6321F5,0xF59C66FB,0x26DCF319,0x7533D928,0xB155FDF5,
     0x03563482,0x8ABA3CBB,0x28517711,0xC20AD9F8,0xABCC5167,0xCCAD925F,
     0x4DE81751,0x3830DC8E,0x379D5862,0x9320F991,0xEA7A90C2,0xFB3E7BCE,
     0x5121CE64,0x774FBE32,0xA8B6E37E,0xC3293D46,0x48DE5369,0x6413E680,
     0xA2AE0810,0xDD6DB224,0x69852DFD,0x09072166,0xB39A460A,0x6445C0DD,
     0x586CDECF,0x1C20C8AE,0x5BBEF7DD,0x1B588D40,0xCCD2017F,0x6BB4E3BB,
     0xDDA26A7E,0x3A59FF45,0x3E350A44,0xBCB4CDD5,0x72EACEA8,0xFA6484BB,
     0x8D6612AE,0xBF3C6F47,0xD29BE463,0x542F5D9E,0xAEC2771B,0xF64E6370,
     0x740E0D8D,0xE75B1357,0xF8721671,0xAF537D5D,0x4040CB08,0x4EB4E2CC,
     0x34D2466A,0x0115AF84,0xE1B00428,0x95983A1D,0x06B89FB4,0xCE6EA048,
     0x6F3F3B82,0x3520AB82,0x011A1D4B,0x277227F8,0x611560B1,0xE7933FDC,
     0xBB3A792B,0x344525BD,0xA08839E1,0x51CE794B,0x2F32C9B7,0xA01FBAC9,
     0xE01CC87E,0xBCC7D1F6,0xCF0111C3,0xA1E8AAC7,0x1A908749,0xD44FBD9A,
     0xD0DADECB,0xD50ADA38,0x0339C32A,0xC6913667,0x8DF9317C,0xE0B12B4F,
     0xF79E59B7,0x43F5BB3A,0xF2D519FF,0x27D9459C,0xBF97222C,0x15E6FC2A,
     0x0F91FC71,0x9B941525,0xFAE59361,0xCEB69CEB,0xC2A86459,0x12BAA8D1,
     0xB6C1075E,0xE3056A0C,0x10D25065,0xCB03A442,0xE0EC6E0E,0x1698DB3B,
     0x4C98A0BE,0x3278E964,0x9F1F9532,0xE0D392DF,0xD3A0342B,0x8971F21E,
     0x1B0A7441,0x4BA3348C,0xC5BE7120,0xC37632D8,0xDF359F8D,0x9B992F2E,
     0xE60B6F47,0x0FE3F11D,0xE54CDA54,0x1EDAD891,0xCE6279CF,0xCD3E7E6F,
     0x1618B166,0xFD2C1D05,0x848FD2C5,0xF6FB2299,0xF523F357,0xA6327623,
     0x93A83531,0x56CCCD02,0xACF08162,0x5A75EBB5,0x6E163697,0x88D273CC,
     0xDE966292,0x81B949D0,0x4C50901B,0x71C65614,0xE6C6C7BD,0x327A140A,
     0x45E1D006,0xC3F27B9A,0xC9AA53FD,0x62A80F00,0xBB25BFE2,0x35BDD2F6,
     0x71126905,0xB2040222,0xB6CBCF7C,0xCD769C2B,0x53113EC0,0x1640E3D3,
     0x38ABBD60,0x2547ADF0,0xBA38209C,0xF746CE76,0x77AFA1C5,0x20756060,
     0x85CBFE4E,0x8AE88DD8,0x7AAAF9B0,0x4CF9AA7E,0x1948C25C,0x02FB8A8C,
     0x01C36AE4,0xD6EBE1F9,0x90D4F869,0xA65CDEA0,0x3F09252D,0xC208E69F,
     0xB74E6132,0xCE77E25B,0x578FDFE3,0x3AC372E6 };
 
 static const u32 ps[16+2] = {
     0x243F6A88,0x85A308D3,0x13198A2E,0x03707344,0xA4093822,0x299F31D0,
     0x082EFA98,0xEC4E6C89,0x452821E6,0x38D01377,0xBE5466CF,0x34E90C6C,
     0xC0AC29B7,0xC97C50DD,0x3F84D5B5,0xB5470917,0x9216D5D9,0x8979FB1B };
 
 
 #ifdef USE_AMD64_ASM
 
 /* Assembly implementations of Blowfish. */
 extern void _gcry_blowfish_amd64_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
 					    u32 *ret_xr);
 
 extern void _gcry_blowfish_amd64_encrypt_block(BLOWFISH_context *c, byte *out,
 					       const byte *in);
 
 extern void _gcry_blowfish_amd64_decrypt_block(BLOWFISH_context *c, byte *out,
 					       const byte *in);
 
 /* These assembly implementations process four blocks in parallel. */
 extern void _gcry_blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *ctr);
 
 extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *iv);
 
 extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *iv);
 
 static void
 do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
   _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr);
 }
 
 static void
 do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
   _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf);
 }
 
 static void
 do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
   _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf);
 }
 
 static inline void
 blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in,
                        byte *ctr)
 {
   _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr);
 }
 
 static inline void
 blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
                        byte *iv)
 {
   _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv);
 }
 
 static inline void
 blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
                        byte *iv)
 {
   _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv);
 }
 
 static unsigned int
 encrypt_block (void *context , byte *outbuf, const byte *inbuf)
 {
   BLOWFISH_context *c = (BLOWFISH_context *) context;
   do_encrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (2*8);
 }
 
 static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   BLOWFISH_context *c = (BLOWFISH_context *) context;
   do_decrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (2*8);
 }
 
 #elif defined(USE_ARM_ASM)
 
 /* Assembly implementations of Blowfish. */
 extern void _gcry_blowfish_arm_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
 					    u32 *ret_xr);
 
 extern void _gcry_blowfish_arm_encrypt_block(BLOWFISH_context *c, byte *out,
 					       const byte *in);
 
 extern void _gcry_blowfish_arm_decrypt_block(BLOWFISH_context *c, byte *out,
 					       const byte *in);
 
 /* These assembly implementations process two blocks in parallel. */
 extern void _gcry_blowfish_arm_ctr_enc(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *ctr);
 
 extern void _gcry_blowfish_arm_cbc_dec(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *iv);
 
 extern void _gcry_blowfish_arm_cfb_dec(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *iv);
 
 static void
 do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
   _gcry_blowfish_arm_do_encrypt (bc, ret_xl, ret_xr);
 }
 
 static void
 do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
   _gcry_blowfish_arm_encrypt_block (context, outbuf, inbuf);
 }
 
 static void
 do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
   _gcry_blowfish_arm_decrypt_block (context, outbuf, inbuf);
 }
 
 static unsigned int
 encrypt_block (void *context , byte *outbuf, const byte *inbuf)
 {
   BLOWFISH_context *c = (BLOWFISH_context *) context;
   do_encrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (10*4);
 }
 
 static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   BLOWFISH_context *c = (BLOWFISH_context *) context;
   do_decrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (10*4);
 }
 
 #else /*USE_ARM_ASM*/
 
 
 #define F(x) ((( s0[(x)>>24] + s1[((x)>>16)&0xff])	 \
 		   ^ s2[((x)>>8)&0xff]) + s3[(x)&0xff] )
 #define R(l,r,i) do { l ^= p[i]; r ^= F(l); } while(0)
 #define R3(l,r,i) do { R(l##0,r##0,i);R(l##1,r##1,i);R(l##2,r##2,i);} while(0)
 
 
 static void
 do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
   u32 xl, xr, *s0, *s1, *s2, *s3, *p;
 
   xl = *ret_xl;
   xr = *ret_xr;
   p = bc->p;
   s0 = bc->s0;
   s1 = bc->s1;
   s2 = bc->s2;
   s3 = bc->s3;
 
   R( xl, xr,  0);
   R( xr, xl,  1);
   R( xl, xr,  2);
   R( xr, xl,  3);
   R( xl, xr,  4);
   R( xr, xl,  5);
   R( xl, xr,  6);
   R( xr, xl,  7);
   R( xl, xr,  8);
   R( xr, xl,  9);
   R( xl, xr, 10);
   R( xr, xl, 11);
   R( xl, xr, 12);
   R( xr, xl, 13);
   R( xl, xr, 14);
   R( xr, xl, 15);
 
   xl ^= p[16];
   xr ^= p[16+1];
 
   *ret_xl = xr;
   *ret_xr = xl;
 }
 
 
 static void
 do_encrypt_3 ( BLOWFISH_context *bc, byte *dst, const byte *src )
 {
   u32 xl0, xr0, xl1, xr1, xl2, xr2, *s0, *s1, *s2, *s3, *p;
 
   xl0 = buf_get_be32(src + 0);
   xr0 = buf_get_be32(src + 4);
   xl1 = buf_get_be32(src + 8);
   xr1 = buf_get_be32(src + 12);
   xl2 = buf_get_be32(src + 16);
   xr2 = buf_get_be32(src + 20);
   p = bc->p;
   s0 = bc->s0;
   s1 = bc->s1;
   s2 = bc->s2;
   s3 = bc->s3;
 
   R3( xl, xr,  0);
   R3( xr, xl,  1);
   R3( xl, xr,  2);
   R3( xr, xl,  3);
   R3( xl, xr,  4);
   R3( xr, xl,  5);
   R3( xl, xr,  6);
   R3( xr, xl,  7);
   R3( xl, xr,  8);
   R3( xr, xl,  9);
   R3( xl, xr, 10);
   R3( xr, xl, 11);
   R3( xl, xr, 12);
   R3( xr, xl, 13);
   R3( xl, xr, 14);
   R3( xr, xl, 15);
 
   xl0 ^= p[16];
   xr0 ^= p[16+1];
   xl1 ^= p[16];
   xr1 ^= p[16+1];
   xl2 ^= p[16];
   xr2 ^= p[16+1];
 
   buf_put_be32(dst + 0, xr0);
   buf_put_be32(dst + 4, xl0);
   buf_put_be32(dst + 8, xr1);
   buf_put_be32(dst + 12, xl1);
   buf_put_be32(dst + 16, xr2);
   buf_put_be32(dst + 20, xl2);
 }
 
 
 static void
 decrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
   u32 xl, xr, *s0, *s1, *s2, *s3, *p;
 
   xl = *ret_xl;
   xr = *ret_xr;
   p = bc->p;
   s0 = bc->s0;
   s1 = bc->s1;
   s2 = bc->s2;
   s3 = bc->s3;
 
   R( xl, xr, 17);
   R( xr, xl, 16);
   R( xl, xr, 15);
   R( xr, xl, 14);
   R( xl, xr, 13);
   R( xr, xl, 12);
   R( xl, xr, 11);
   R( xr, xl, 10);
   R( xl, xr,  9);
   R( xr, xl,  8);
   R( xl, xr,  7);
   R( xr, xl,  6);
   R( xl, xr,  5);
   R( xr, xl,  4);
   R( xl, xr,  3);
   R( xr, xl,  2);
 
   xl ^= p[1];
   xr ^= p[0];
 
   *ret_xl = xr;
   *ret_xr = xl;
 }
 
 
 static void
 do_decrypt_3 ( BLOWFISH_context *bc, byte *dst, const byte *src )
 {
   u32 xl0, xr0, xl1, xr1, xl2, xr2, *s0, *s1, *s2, *s3, *p;
 
   xl0 = buf_get_be32(src + 0);
   xr0 = buf_get_be32(src + 4);
   xl1 = buf_get_be32(src + 8);
   xr1 = buf_get_be32(src + 12);
   xl2 = buf_get_be32(src + 16);
   xr2 = buf_get_be32(src + 20);
   p = bc->p;
   s0 = bc->s0;
   s1 = bc->s1;
   s2 = bc->s2;
   s3 = bc->s3;
 
   R3( xl, xr, 17);
   R3( xr, xl, 16);
   R3( xl, xr, 15);
   R3( xr, xl, 14);
   R3( xl, xr, 13);
   R3( xr, xl, 12);
   R3( xl, xr, 11);
   R3( xr, xl, 10);
   R3( xl, xr,  9);
   R3( xr, xl,  8);
   R3( xl, xr,  7);
   R3( xr, xl,  6);
   R3( xl, xr,  5);
   R3( xr, xl,  4);
   R3( xl, xr,  3);
   R3( xr, xl,  2);
 
   xl0 ^= p[1];
   xr0 ^= p[0];
   xl1 ^= p[1];
   xr1 ^= p[0];
   xl2 ^= p[1];
   xr2 ^= p[0];
 
   buf_put_be32(dst + 0, xr0);
   buf_put_be32(dst + 4, xl0);
   buf_put_be32(dst + 8, xr1);
   buf_put_be32(dst + 12, xl1);
   buf_put_be32(dst + 16, xr2);
   buf_put_be32(dst + 20, xl2);
 }
 
 #undef F
 #undef R
 #undef R3
 
 static void
 do_encrypt_block ( BLOWFISH_context *bc, byte *outbuf, const byte *inbuf )
 {
   u32 d1, d2;
 
   d1 = buf_get_be32(inbuf);
   d2 = buf_get_be32(inbuf + 4);
   do_encrypt( bc, &d1, &d2 );
   buf_put_be32(outbuf, d1);
   buf_put_be32(outbuf + 4, d2);
 }
 
 static unsigned int
 encrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   BLOWFISH_context *bc = (BLOWFISH_context *) context;
   do_encrypt_block (bc, outbuf, inbuf);
   return /*burn_stack*/ (64);
 }
 
 
 static void
 do_decrypt_block (BLOWFISH_context *bc, byte *outbuf, const byte *inbuf)
 {
   u32 d1, d2;
 
   d1 = buf_get_be32(inbuf);
   d2 = buf_get_be32(inbuf + 4);
   decrypt( bc, &d1, &d2 );
   buf_put_be32(outbuf, d1);
   buf_put_be32(outbuf + 4, d2);
 }
 
 static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   BLOWFISH_context *bc = (BLOWFISH_context *) context;
   do_decrypt_block (bc, outbuf, inbuf);
   return /*burn_stack*/ (64);
 }
 
 #endif /*!USE_AMD64_ASM&&!USE_ARM_ASM*/
 
 
 /* Bulk encryption of complete blocks in CTR mode.  This function is only
    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
    of size BLOWFISH_BLOCKSIZE. */
 static void
 _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
 		       const void *inbuf_arg, size_t nblocks)
 {
   BLOWFISH_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char tmpbuf[BLOWFISH_BLOCKSIZE * 3];
   int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
 
 #ifdef USE_AMD64_ASM
   {
     if (nblocks >= 4)
       burn_stack_depth += 5 * sizeof(void*);
 
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
         blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;
         inbuf  += 4 * BLOWFISH_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
         _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 2;
         outbuf += 2 * BLOWFISH_BLOCKSIZE;
         inbuf  += 2 * BLOWFISH_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   for ( ;nblocks >= 3; nblocks -= 3)
     {
       /* Prepare the counter blocks. */
       cipher_block_cpy (tmpbuf + 0, ctr, BLOWFISH_BLOCKSIZE);
       cipher_block_cpy (tmpbuf + 8, ctr, BLOWFISH_BLOCKSIZE);
       cipher_block_cpy (tmpbuf + 16, ctr, BLOWFISH_BLOCKSIZE);
       cipher_block_add (tmpbuf + 8, 1, BLOWFISH_BLOCKSIZE);
       cipher_block_add (tmpbuf + 16, 2, BLOWFISH_BLOCKSIZE);
       cipher_block_add (ctr, 3, BLOWFISH_BLOCKSIZE);
       /* Encrypt the counter. */
       do_encrypt_3(ctx, tmpbuf, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
       buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE * 3);
       outbuf += BLOWFISH_BLOCKSIZE * 3;
       inbuf  += BLOWFISH_BLOCKSIZE * 3;
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* Encrypt the counter. */
       do_encrypt_block(ctx, tmpbuf, ctr);
       /* XOR the input with the encrypted counter and store in output.  */
       cipher_block_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE);
       outbuf += BLOWFISH_BLOCKSIZE;
       inbuf  += BLOWFISH_BLOCKSIZE;
       /* Increment the counter.  */
       cipher_block_add (ctr, 1, BLOWFISH_BLOCKSIZE);
     }
 
   wipememory(tmpbuf, sizeof(tmpbuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk decryption of complete blocks in CBC mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
 		       const void *inbuf_arg, size_t nblocks)
 {
   BLOWFISH_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char savebuf[BLOWFISH_BLOCKSIZE * 3];
   int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
 
 #ifdef USE_AMD64_ASM
   {
     if (nblocks >= 4)
       burn_stack_depth += 5 * sizeof(void*);
 
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
         blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;
         inbuf  += 4 * BLOWFISH_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
         _gcry_blowfish_arm_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 2;
         outbuf += 2 * BLOWFISH_BLOCKSIZE;
         inbuf  += 2 * BLOWFISH_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   for ( ;nblocks >= 3; nblocks -= 3)
     {
       /* INBUF is needed later and it may be identical to OUTBUF, so store
          the intermediate result to SAVEBUF.  */
       do_decrypt_3 (ctx, savebuf, inbuf);
 
       cipher_block_xor_1 (savebuf + 0, iv, BLOWFISH_BLOCKSIZE);
       cipher_block_xor_1 (savebuf + 8, inbuf, BLOWFISH_BLOCKSIZE * 2);
       cipher_block_cpy (iv, inbuf + 16, BLOWFISH_BLOCKSIZE);
       buf_cpy (outbuf, savebuf, BLOWFISH_BLOCKSIZE * 3);
       inbuf += BLOWFISH_BLOCKSIZE * 3;
       outbuf += BLOWFISH_BLOCKSIZE * 3;
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* INBUF is needed later and it may be identical to OUTBUF, so store
          the intermediate result to SAVEBUF.  */
       do_decrypt_block (ctx, savebuf, inbuf);
 
       cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
       inbuf += BLOWFISH_BLOCKSIZE;
       outbuf += BLOWFISH_BLOCKSIZE;
     }
 
   wipememory(savebuf, sizeof(savebuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk decryption of complete blocks in CFB mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
 		       const void *inbuf_arg, size_t nblocks)
 {
   BLOWFISH_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char tmpbuf[BLOWFISH_BLOCKSIZE * 3];
   int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
 
 #ifdef USE_AMD64_ASM
   {
     if (nblocks >= 4)
       burn_stack_depth += 5 * sizeof(void*);
 
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
         blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;
         inbuf  += 4 * BLOWFISH_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
         _gcry_blowfish_arm_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 2;
         outbuf += 2 * BLOWFISH_BLOCKSIZE;
         inbuf  += 2 * BLOWFISH_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   for ( ;nblocks >= 3; nblocks -= 3 )
     {
       cipher_block_cpy (tmpbuf + 0, iv, BLOWFISH_BLOCKSIZE);
       cipher_block_cpy (tmpbuf + 8, inbuf + 0, BLOWFISH_BLOCKSIZE * 2);
       cipher_block_cpy (iv, inbuf + 16, BLOWFISH_BLOCKSIZE);
       do_encrypt_3 (ctx, tmpbuf, tmpbuf);
       buf_xor (outbuf, inbuf, tmpbuf, BLOWFISH_BLOCKSIZE * 3);
       outbuf += BLOWFISH_BLOCKSIZE * 3;
       inbuf  += BLOWFISH_BLOCKSIZE * 3;
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       do_encrypt_block(ctx, iv, iv);
       cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
       outbuf += BLOWFISH_BLOCKSIZE;
       inbuf  += BLOWFISH_BLOCKSIZE;
     }
 
   wipememory(tmpbuf, sizeof(tmpbuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 static const char*
 selftest(void)
 {
   BLOWFISH_context c;
   cipher_bulk_ops_t bulk_ops;
   byte plain[] = "BLOWFISH";
   byte buffer[8];
   static const byte plain3[] =
     { 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10 };
   static const byte key3[] =
     { 0x41, 0x79, 0x6E, 0xA0, 0x52, 0x61, 0x6E, 0xE4 };
   static const byte cipher3[] =
     { 0xE1, 0x13, 0xF4, 0x10, 0x2C, 0xFC, 0xCE, 0x43 };
 
   bf_setkey( (void *) &c,
              (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26,
              &bulk_ops );
   encrypt_block( (void *) &c, buffer, plain );
   if( memcmp( buffer, "\x32\x4E\xD0\xFE\xF4\x13\xA2\x03", 8 ) )
     return "Blowfish selftest failed (1).";
   decrypt_block( (void *) &c, buffer, buffer );
   if( memcmp( buffer, plain, 8 ) )
     return "Blowfish selftest failed (2).";
 
   bf_setkey( (void *) &c, key3, 8, &bulk_ops );
   encrypt_block( (void *) &c, buffer, plain3 );
   if( memcmp( buffer, cipher3, 8 ) )
     return "Blowfish selftest failed (3).";
   decrypt_block( (void *) &c, buffer, buffer );
   if( memcmp( buffer, plain3, 8 ) )
     return "Blowfish selftest failed (4).";
 
   return NULL;
 }
 
 
 struct hashset_elem {
   u32 val;
   short nidx;
   char used;
 };
 
 static inline byte
 val_to_hidx(u32 val)
 {
   /* bf sboxes are quite random already. */
   return (val >> 24) ^ (val >> 16)  ^ (val >> 8) ^ val;
 }
 
 static inline int
 add_val(struct hashset_elem hset[256], u32 val, int *midx,
 	struct hashset_elem *mpool)
 {
   struct hashset_elem *elem;
   byte hidx;
 
   hidx = val_to_hidx(val);
   elem = &hset[hidx];
 
   /* Check if first is in use. */
   if (elem->used == 0)
     {
       elem->val = val;
       elem->nidx = -1;
       elem->used = 1;
       return 0;
     }
 
   /* Check if first matches. */
   if (elem->val == val)
     return 1;
 
   for (; elem->nidx >= 0; elem = &mpool[elem->nidx])
     {
       /* Check if elem matches. */
       if (elem->val == val)
         return 1;
     }
 
   elem->nidx = (*midx)++;
   elem = &mpool[elem->nidx];
 
   elem->val = val;
   elem->nidx = -1;
   elem->used = 1;
 
   return 0;
 }
 
 static gcry_err_code_t
 do_bf_setkey (BLOWFISH_context *c, const byte *key, unsigned keylen)
 {
   struct hashset_elem mempool[4 * 255]; /* Enough entries for the worst case. */
   struct hashset_elem hset[4][256];
   int memidx = 0;
   int weak = 0;
   int i, j, ret;
   u32 data, datal, datar;
   static int initialized;
   static const char *selftest_failed;
 
   if( !initialized )
     {
       initialized = 1;
       selftest_failed = selftest();
       if( selftest_failed )
         log_error ("%s\n", selftest_failed );
     }
   if( selftest_failed )
     return GPG_ERR_SELFTEST_FAILED;
 
   if (keylen < BLOWFISH_KEY_MIN_BITS / 8 ||
       keylen > BLOWFISH_KEY_MAX_BITS / 8)
     return GPG_ERR_INV_KEYLEN;
 
   memset(hset, 0, sizeof(hset));
 
   for(i=0; i < 16+2; i++ )
     c->p[i] = ps[i];
   for(i=0; i < 256; i++ )
     {
       c->s0[i] = ks0[i];
       c->s1[i] = ks1[i];
       c->s2[i] = ks2[i];
       c->s3[i] = ks3[i];
     }
 
   for(i=j=0; i < 16+2; i++ )
     {
       data = ((u32)key[j] << 24) |
              ((u32)key[(j+1)%keylen] << 16) |
              ((u32)key[(j+2)%keylen] << 8) |
              ((u32)key[(j+3)%keylen]);
       c->p[i] ^= data;
       j = (j+4) % keylen;
     }
 
   datal = datar = 0;
   for(i=0; i < 16+2; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->p[i]   = datal;
       c->p[i+1] = datar;
     }
   for(i=0; i < 256; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->s0[i]   = datal;
       c->s0[i+1] = datar;
 
       /* Add values to hashset, detect duplicates (weak keys). */
       ret = add_val (hset[0], datal, &memidx, mempool);
       weak = ret ? 1 : weak;
       ret = add_val (hset[0], datar, &memidx, mempool);
       weak = ret ? 1 : weak;
     }
   for(i=0; i < 256; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->s1[i]   = datal;
       c->s1[i+1] = datar;
 
       /* Add values to hashset, detect duplicates (weak keys). */
       ret = add_val (hset[1], datal, &memidx, mempool);
       weak = ret ? 1 : weak;
       ret = add_val (hset[1], datar, &memidx, mempool);
       weak = ret ? 1 : weak;
     }
   for(i=0; i < 256; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->s2[i]   = datal;
       c->s2[i+1] = datar;
 
       /* Add values to hashset, detect duplicates (weak keys). */
       ret = add_val (hset[2], datal, &memidx, mempool);
       weak = ret ? 1 : weak;
       ret = add_val (hset[2], datar, &memidx, mempool);
       weak = ret ? 1 : weak;
     }
   for(i=0; i < 256; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->s3[i]   = datal;
       c->s3[i+1] = datar;
 
       /* Add values to hashset, detect duplicates (weak keys). */
       ret = add_val (hset[3], datal, &memidx, mempool);
       weak = ret ? 1 : weak;
       ret = add_val (hset[3], datar, &memidx, mempool);
       weak = ret ? 1 : weak;
     }
 
   /* Clear stack. */
   wipememory(hset, sizeof(hset));
   wipememory(mempool, sizeof(mempool[0]) * memidx);
 
   _gcry_burn_stack (64);
 
   /* Check for weak key.  A weak key is a key in which a value in
      the P-array (here c) occurs more than once per table.  */
   if (weak)
     return GPG_ERR_WEAK_KEY;
 
   return GPG_ERR_NO_ERROR;
 }
 
 
 static gcry_err_code_t
 bf_setkey (void *context, const byte *key, unsigned keylen,
            cipher_bulk_ops_t *bulk_ops)
 {
   BLOWFISH_context *c = (BLOWFISH_context *) context;
   gcry_err_code_t rc = do_bf_setkey (c, key, keylen);
 
   /* Setup bulk encryption routines.  */
   memset (bulk_ops, 0, sizeof(*bulk_ops));
   bulk_ops->cfb_dec = _gcry_blowfish_cfb_dec;
   bulk_ops->cbc_dec = _gcry_blowfish_cbc_dec;
   bulk_ops->ctr_enc = _gcry_blowfish_ctr_enc;
 
   return rc;
 }
 
 
 gcry_cipher_spec_t _gcry_cipher_spec_blowfish =
   {
     GCRY_CIPHER_BLOWFISH, {0, 0},
     "BLOWFISH", NULL, NULL, BLOWFISH_BLOCKSIZE, 128,
     sizeof (BLOWFISH_context),
     bf_setkey, encrypt_block, decrypt_block
   };
diff --git a/cipher/cast5.c b/cipher/cast5.c
index 20bf7479..455b29e3 100644
--- a/cipher/cast5.c
+++ b/cipher/cast5.c
@@ -1,1185 +1,1185 @@
 /* cast5.c  -  CAST5 cipher (RFC2144)
  *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 /* Test vectors:
  *
  * 128-bit key	       = 01 23 45 67 12 34 56 78 23 45 67 89 34 56 78 9A
  *	   plaintext   = 01 23 45 67 89 AB CD EF
  *	   ciphertext  = 23 8B 4F E5 84 7E 44 B2
  *
  * 80-bit  key	       = 01 23 45 67 12 34 56 78 23 45
  *		       = 01 23 45 67 12 34 56 78 23 45 00 00 00 00 00 00
  *	   plaintext   = 01 23 45 67 89 AB CD EF
  *	   ciphertext  = EB 6A 71 1A 2C 02 27 1B
  *
  * 40-bit  key	       = 01 23 45 67 12
  *		       = 01 23 45 67 12 00 00 00 00 00 00 00 00 00 00 00
  *	   plaintext   = 01 23 45 67 89 AB CD EF
  *	   ciphertext  = 7A C8 16 D1 6E 9B 30 2E
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "g10lib.h"
 #include "types.h"
 #include "cipher.h"
 #include "bithelp.h"
 #include "bufhelp.h"
 #include "cipher-internal.h"
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 /* USE_ARM_ASM indicates whether to use ARM assembly code. */
 #undef USE_ARM_ASM
 #if defined(__ARMEL__)
 # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
 #  define USE_ARM_ASM 1
 # endif
 #endif
 
 #define CAST5_BLOCKSIZE 8
 
 typedef struct {
     u32  Km[16];
     byte Kr[16];
 #ifdef USE_ARM_ASM
     u32 Kr_arm_enc[16 / sizeof(u32)];
     u32 Kr_arm_dec[16 / sizeof(u32)];
 #endif
 } CAST5_context;
 
 static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen,
                                     cipher_bulk_ops_t *bulk_ops);
 static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf);
 static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf);
 
 
 
 #define s1 _gcry_cast5_s1to4[0]
 #define s2 _gcry_cast5_s1to4[1]
 #define s3 _gcry_cast5_s1to4[2]
 #define s4 _gcry_cast5_s1to4[3]
 
 const u32 _gcry_cast5_s1to4[4][256] = { {
 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949,
 0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e,
 0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d,
 0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, 0x22568e3a, 0xa2d341d0,
 0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7,
 0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935,
 0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d,
 0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50,
 0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe,
 0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, 0xe63d37e0, 0x2a54f6b3,
 0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167,
 0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291,
 0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779,
 0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6c2,
 0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511,
 0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d,
 0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5,
 0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324,
 0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c,
 0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc,
 0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d,
 0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96,
 0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a,
 0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d,
 0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd,
 0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6,
 0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9,
 0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872,
 0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814c,
 0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e,
 0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9,
 0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
 }, {
 0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651,
 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3,
 0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb,
 0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, 0x25a1ff41, 0xe180f806,
 0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b,
 0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359,
 0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, 0x361e3084, 0xe4eb573b,
 0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c,
 0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34,
 0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, 0x721d9bfd, 0xa58684bb,
 0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd,
 0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860,
 0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, 0x83ca6b94, 0x2d6ed23b,
 0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304,
 0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b,
 0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf,
 0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c,
 0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13,
 0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, 0x7cbad9a2, 0x2180036f,
 0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6,
 0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6,
 0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, 0xef8579cc, 0xd152de58,
 0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906,
 0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d,
 0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc, 0x301e16e6, 0x273be979, 0xb0ffeaa6,
 0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4,
 0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6,
 0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, 0x145892f5, 0x91584f7f,
 0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249,
 0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa,
 0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9,
 0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1
 }, {
 0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90,
 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5,
 0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e,
 0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240,
 0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5,
 0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b,
 0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, 0xc5884a28, 0xccc36f71,
 0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04,
 0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82,
 0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, 0x796fb449, 0x8252dc15,
 0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2,
 0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176,
 0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, 0x96bbb682, 0x93b4b148,
 0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc,
 0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341,
 0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e,
 0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51,
 0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f,
 0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, 0x27627545, 0x825cf47a,
 0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b,
 0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b,
 0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, 0x12deca4d, 0x2c3f8cc5,
 0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45,
 0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536,
 0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, 0xa2e53f55, 0xb9e6d4bc,
 0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0,
 0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69,
 0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, 0xf1ac2571, 0xcc8239c2,
 0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49,
 0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d,
 0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a,
 0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783
 }, {
 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1,
 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf,
 0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15,
 0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, 0x081b08ca, 0x05170121,
 0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25,
 0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5,
 0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb,
 0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5,
 0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d,
 0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6,
 0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23,
 0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003,
 0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de, 0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6,
 0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119,
 0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24,
 0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, 0x041afa32, 0x1d16625a,
 0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79,
 0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df,
 0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, 0x213d42f6, 0x2c1c7c26,
 0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab,
 0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7,
 0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, 0xcfcbd12f, 0xc1de8417,
 0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2,
 0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2,
 0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a,
 0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919,
 0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef,
 0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb, 0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876,
 0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab,
 0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04,
 0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282,
 0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
 } };
 static const u32 s5[256] = {
 0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f,
 0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a,
 0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff,
 0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02,
 0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a,
 0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7,
 0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9,
 0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981,
 0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774,
 0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655,
 0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2,
 0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910,
 0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1,
 0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da,
 0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049,
 0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f,
 0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba,
 0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be,
 0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3,
 0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840,
 0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4,
 0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2,
 0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7,
 0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5,
 0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e,
 0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e,
 0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801,
 0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad,
 0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0,
 0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20,
 0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8,
 0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4
 };
 static const u32 s6[256] = {
 0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac,
 0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138,
 0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367,
 0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98,
 0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072,
 0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3,
 0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd,
 0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8,
 0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9,
 0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54,
 0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387,
 0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc,
 0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf,
 0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf,
 0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f,
 0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289,
 0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950,
 0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f,
 0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b,
 0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be,
 0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13,
 0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976,
 0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0,
 0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891,
 0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da,
 0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc,
 0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084,
 0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25,
 0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121,
 0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5,
 0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd,
 0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f
 };
 static const u32 s7[256] = {
 0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f,
 0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de,
 0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43,
 0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19,
 0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2,
 0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516,
 0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88,
 0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816,
 0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756,
 0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a,
 0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264,
 0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688,
 0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28,
 0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3,
 0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7,
 0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06,
 0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033,
 0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a,
 0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566,
 0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509,
 0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962,
 0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e,
 0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c,
 0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c,
 0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285,
 0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301,
 0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be,
 0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767,
 0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647,
 0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914,
 0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c,
 0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3
 };
 static const u32 s8[256] = {
 0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5,
 0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc,
 0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd,
 0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d,
 0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2,
 0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862,
 0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc,
 0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c,
 0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e,
 0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039,
 0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8,
 0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42,
 0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5,
 0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472,
 0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225,
 0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c,
 0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb,
 0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054,
 0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70,
 0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc,
 0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c,
 0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3,
 0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4,
 0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101,
 0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f,
 0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e,
 0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a,
 0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c,
 0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384,
 0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c,
 0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82,
 0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e
 };
 
 
 #ifdef USE_AMD64_ASM
 
 /* Assembly implementations of CAST5. */
 extern void _gcry_cast5_amd64_encrypt_block(CAST5_context *c, byte *outbuf,
 					    const byte *inbuf);
 
 extern void _gcry_cast5_amd64_decrypt_block(CAST5_context *c, byte *outbuf,
 					    const byte *inbuf);
 
 /* These assembly implementations process four blocks in parallel. */
 extern void _gcry_cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *ctr);
 
 extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *iv);
 
 extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *iv);
 
 static void
 do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
   _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf);
 }
 
 static void
 do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
   _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf);
 }
 
 static void
 cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr)
 {
   _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr);
 }
 
 static void
 cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
 {
   _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv);
 }
 
 static void
 cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
 {
   _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv);
 }
 
 static unsigned int
 encrypt_block (void *context , byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
   do_encrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (2*8);
 }
 
 static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
   do_decrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (2*8);
 }
 
 #elif defined(USE_ARM_ASM)
 
 /* ARM assembly implementations of CAST5. */
 extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf,
 					    const byte *inbuf);
 
 extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf,
 					    const byte *inbuf);
 
 /* These assembly implementations process two blocks in parallel. */
 extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *ctr);
 
 extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *iv);
 
 extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *iv);
 
 static void
 do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
   _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf);
 }
 
 static void
 do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
   _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf);
 }
 
 static unsigned int
 encrypt_block (void *context , byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
   do_encrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (10*4);
 }
 
 static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
   do_decrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (10*4);
 }
 
 #else /*USE_ARM_ASM*/
 
 #define F1(D,m,r)  (  (I = ((m) + (D))), (I=rol(I,(r))),   \
     (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) )
 #define F2(D,m,r)  (  (I = ((m) ^ (D))), (I=rol(I,(r))),   \
     (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]) )
 #define F3(D,m,r)  (  (I = ((m) - (D))), (I=rol(I,(r))),   \
     (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]) )
 
 static void
 do_encrypt_block( CAST5_context *c, byte *outbuf, const byte *inbuf )
 {
     u32 l, r, t;
     u32 I;   /* used by the Fx macros */
     u32 *Km;
     u32 Kr;
 
     Km = c->Km;
     Kr = buf_get_le32(c->Kr + 0);
 
     /* (L0,R0) <-- (m1...m64).	(Split the plaintext into left and
      * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.)
      */
     l = buf_get_be32(inbuf + 0);
     r = buf_get_be32(inbuf + 4);
 
     /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows:
      *	Li = Ri-1;
      *	Ri = Li-1 ^ f(Ri-1,Kmi,Kri), where f is defined in Section 2.2
      * Rounds 1, 4, 7, 10, 13, and 16 use f function Type 1.
      * Rounds 2, 5, 8, 11, and 14 use f function Type 2.
      * Rounds 3, 6, 9, 12, and 15 use f function Type 3.
      */
 
     t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr = buf_get_le32(c->Kr + 4);
     t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr = buf_get_le32(c->Kr + 8);
     t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr = buf_get_le32(c->Kr + 12);
     t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31);
 
     /* c1...c64 <-- (R16,L16).	(Exchange final blocks L16, R16 and
      *	concatenate to form the ciphertext.) */
     buf_put_be32(outbuf + 0, r);
     buf_put_be32(outbuf + 4, l);
 }
 
 static unsigned int
 encrypt_block (void *context , byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
   do_encrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (20+4*sizeof(void*));
 }
 
 
 static void
 do_encrypt_block_3( CAST5_context *c, byte *outbuf, const byte *inbuf )
 {
     u32 l0, r0, t0, l1, r1, t1, l2, r2, t2;
     u32 I;   /* used by the Fx macros */
     u32 *Km;
     u32 Kr;
 
     Km = c->Km;
     Kr = buf_get_le32(c->Kr + 0);
 
     l0 = buf_get_be32(inbuf + 0);
     r0 = buf_get_be32(inbuf + 4);
     l1 = buf_get_be32(inbuf + 8);
     r1 = buf_get_be32(inbuf + 12);
     l2 = buf_get_be32(inbuf + 16);
     r2 = buf_get_be32(inbuf + 20);
 
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31);
     Kr = buf_get_le32(c->Kr + 4);
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31);
     Kr = buf_get_le32(c->Kr + 8);
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31);
     Kr = buf_get_le32(c->Kr + 12);
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31);
 
     buf_put_be32(outbuf + 0, r0);
     buf_put_be32(outbuf + 4, l0);
     buf_put_be32(outbuf + 8, r1);
     buf_put_be32(outbuf + 12, l1);
     buf_put_be32(outbuf + 16, r2);
     buf_put_be32(outbuf + 20, l2);
 }
 
 
 static void
 do_decrypt_block (CAST5_context *c, byte *outbuf, const byte *inbuf )
 {
     u32 l, r, t;
     u32 I;
     u32 *Km;
     u32 Kr;
 
     Km = c->Km;
     Kr = buf_get_be32(c->Kr + 12);
 
     l = buf_get_be32(inbuf + 0);
     r = buf_get_be32(inbuf + 4);
 
     t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr = buf_get_be32(c->Kr + 8);
     t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr = buf_get_be32(c->Kr + 4);
     t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr = buf_get_be32(c->Kr + 0);
     t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8;
     t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31);
 
     buf_put_be32(outbuf + 0, r);
     buf_put_be32(outbuf + 4, l);
 }
 
 static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
   do_decrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (20+4*sizeof(void*));
 }
 
 
 static void
 do_decrypt_block_3 (CAST5_context *c, byte *outbuf, const byte *inbuf )
 {
     u32 l0, r0, t0, l1, r1, t1, l2, r2, t2;
     u32 I;
     u32 *Km;
     u32 Kr;
 
     Km = c->Km;
     Kr = buf_get_be32(c->Kr + 12);
 
     l0 = buf_get_be32(inbuf + 0);
     r0 = buf_get_be32(inbuf + 4);
     l1 = buf_get_be32(inbuf + 8);
     r1 = buf_get_be32(inbuf + 12);
     l2 = buf_get_be32(inbuf + 16);
     r2 = buf_get_be32(inbuf + 20);
 
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31);
     Kr = buf_get_be32(c->Kr + 8);
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31);
     Kr = buf_get_be32(c->Kr + 4);
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31);
     Kr = buf_get_be32(c->Kr + 0);
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31);
     Kr >>= 8;
     t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31);
 	    t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31);
 		    t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31);
 
     buf_put_be32(outbuf + 0, r0);
     buf_put_be32(outbuf + 4, l0);
     buf_put_be32(outbuf + 8, r1);
     buf_put_be32(outbuf + 12, l1);
     buf_put_be32(outbuf + 16, r2);
     buf_put_be32(outbuf + 20, l2);
 }
 
 #endif /*!USE_ARM_ASM*/
 
 
 /* Bulk encryption of complete blocks in CTR mode.  This function is only
    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
    of size CAST5_BLOCKSIZE. */
 static void
 _gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
 		    const void *inbuf_arg, size_t nblocks)
 {
   CAST5_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char tmpbuf[CAST5_BLOCKSIZE * 3];
   int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
 
 #ifdef USE_AMD64_ASM
   {
     if (nblocks >= 4)
       burn_stack_depth += 8 * sizeof(void*);
 
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
         cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 4;
         outbuf += 4 * CAST5_BLOCKSIZE;
         inbuf  += 4 * CAST5_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
         _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 2;
         outbuf += 2 * CAST5_BLOCKSIZE;
         inbuf  += 2 * CAST5_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   for ( ;nblocks >= 3; nblocks -= 3)
     {
       /* Prepare the counter blocks. */
       cipher_block_cpy (tmpbuf + 0, ctr, CAST5_BLOCKSIZE);
       cipher_block_cpy (tmpbuf + 8, ctr, CAST5_BLOCKSIZE);
       cipher_block_cpy (tmpbuf + 16, ctr, CAST5_BLOCKSIZE);
       cipher_block_add (tmpbuf + 8, 1, CAST5_BLOCKSIZE);
       cipher_block_add (tmpbuf + 16, 2, CAST5_BLOCKSIZE);
       cipher_block_add (ctr, 3, CAST5_BLOCKSIZE);
       /* Encrypt the counter. */
       do_encrypt_block_3(ctx, tmpbuf, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
       buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE * 3);
       outbuf += CAST5_BLOCKSIZE * 3;
       inbuf  += CAST5_BLOCKSIZE * 3;
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* Encrypt the counter. */
       do_encrypt_block(ctx, tmpbuf, ctr);
       /* XOR the input with the encrypted counter and store in output.  */
       cipher_block_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
       outbuf += CAST5_BLOCKSIZE;
       inbuf  += CAST5_BLOCKSIZE;
       /* Increment the counter.  */
       cipher_block_add (ctr, 1, CAST5_BLOCKSIZE);
     }
 
   wipememory(tmpbuf, sizeof(tmpbuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk decryption of complete blocks in CBC mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
 		    const void *inbuf_arg, size_t nblocks)
 {
   CAST5_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char savebuf[CAST5_BLOCKSIZE * 3];
   int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
 
 #ifdef USE_AMD64_ASM
   {
     if (nblocks >= 4)
       burn_stack_depth += 8 * sizeof(void*);
 
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
         cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * CAST5_BLOCKSIZE;
         inbuf  += 4 * CAST5_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
         _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 2;
         outbuf += 2 * CAST5_BLOCKSIZE;
         inbuf  += 2 * CAST5_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   for ( ;nblocks >= 3; nblocks -= 3)
     {
       /* INBUF is needed later and it may be identical to OUTBUF, so store
          the intermediate result to SAVEBUF.  */
       do_decrypt_block_3 (ctx, savebuf, inbuf);
 
       cipher_block_xor_1 (savebuf + 0, iv, CAST5_BLOCKSIZE);
       cipher_block_xor_1 (savebuf + 8, inbuf, CAST5_BLOCKSIZE * 2);
       cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE);
       buf_cpy (outbuf, savebuf, CAST5_BLOCKSIZE * 3);
       inbuf += CAST5_BLOCKSIZE * 3;
       outbuf += CAST5_BLOCKSIZE * 3;
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* INBUF is needed later and it may be identical to OUTBUF, so store
          the intermediate result to SAVEBUF.  */
       do_decrypt_block (ctx, savebuf, inbuf);
 
       cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE);
       inbuf += CAST5_BLOCKSIZE;
       outbuf += CAST5_BLOCKSIZE;
     }
 
   wipememory(savebuf, sizeof(savebuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 /* Bulk decryption of complete blocks in CFB mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
 		    const void *inbuf_arg, size_t nblocks)
 {
   CAST5_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char tmpbuf[CAST5_BLOCKSIZE * 3];
   int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
 
 #ifdef USE_AMD64_ASM
   {
     if (nblocks >= 4)
       burn_stack_depth += 8 * sizeof(void*);
 
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
         cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * CAST5_BLOCKSIZE;
         inbuf  += 4 * CAST5_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
         _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 2;
         outbuf += 2 * CAST5_BLOCKSIZE;
         inbuf  += 2 * CAST5_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   for ( ;nblocks >= 3; nblocks -= 3 )
     {
       cipher_block_cpy (tmpbuf + 0, iv, CAST5_BLOCKSIZE);
       cipher_block_cpy (tmpbuf + 8, inbuf + 0, CAST5_BLOCKSIZE * 2);
       cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE);
       do_encrypt_block_3 (ctx, tmpbuf, tmpbuf);
       buf_xor (outbuf, inbuf, tmpbuf, CAST5_BLOCKSIZE * 3);
       outbuf += CAST5_BLOCKSIZE * 3;
       inbuf  += CAST5_BLOCKSIZE * 3;
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       do_encrypt_block(ctx, iv, iv);
       cipher_block_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
       outbuf += CAST5_BLOCKSIZE;
       inbuf  += CAST5_BLOCKSIZE;
     }
 
   wipememory(tmpbuf, sizeof(tmpbuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 static const char*
 selftest(void)
 {
     CAST5_context c;
     cipher_bulk_ops_t bulk_ops;
     static const byte key[16] =
                     { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78,
 		      0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A  };
     static const byte plain[8] =
                     { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF };
     static const byte cipher[8] =
                     { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 };
     byte buffer[8];
 
     cast_setkey( &c, key, 16, &bulk_ops );
     encrypt_block( &c, buffer, plain );
     if( memcmp( buffer, cipher, 8 ) )
 	return "1";
     decrypt_block( &c, buffer, buffer );
     if( memcmp( buffer, plain, 8 ) )
 	return "2";
 
 #if 0 /* full maintenance test */
     {
 	int i;
 	byte a0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78,
 			0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A };
 	byte b0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78,
 			0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A };
 	byte a1[16] = { 0xEE,0xA9,0xD0,0xA2,0x49,0xFD,0x3B,0xA6,
 			0xB3,0x43,0x6F,0xB8,0x9D,0x6D,0xCA,0x92 };
 	byte b1[16] = { 0xB2,0xC9,0x5E,0xB0,0x0C,0x31,0xAD,0x71,
 			0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E };
 
 	for(i=0; i < 1000000; i++ ) {
 	    cast_setkey( &c, b0, 16, &bulk_ops );
 	    encrypt_block( &c, a0, a0 );
 	    encrypt_block( &c, a0+8, a0+8 );
 	    cast_setkey( &c, a0, 16, &bulk_ops );
 	    encrypt_block( &c, b0, b0 );
 	    encrypt_block( &c, b0+8, b0+8 );
 	}
 	if( memcmp( a0, a1, 16 ) || memcmp( b0, b1, 16 ) )
 	    return "3";
 
     }
 #endif
 
     return NULL;
 }
 
 
 static void
 key_schedule( u32 *x, u32 *z, u32 *k )
 {
 
 #define xi(i)   ((x[(i)/4] >> (8*(3-((i)%4)))) & 0xff)
 #define zi(i)   ((z[(i)/4] >> (8*(3-((i)%4)))) & 0xff)
 
     z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)];
     z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)];
     z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)];
     z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)];
     k[0] = s5[zi( 8)]^s6[zi( 9)]^s7[zi( 7)]^s8[zi( 6)]^s5[zi( 2)];
     k[1] = s5[zi(10)]^s6[zi(11)]^s7[zi( 5)]^s8[zi( 4)]^s6[zi( 6)];
     k[2] = s5[zi(12)]^s6[zi(13)]^s7[zi( 3)]^s8[zi( 2)]^s7[zi( 9)];
     k[3] = s5[zi(14)]^s6[zi(15)]^s7[zi( 1)]^s8[zi( 0)]^s8[zi(12)];
 
     x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)];
     x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)];
     x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)];
     x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)];
     k[4] = s5[xi( 3)]^s6[xi( 2)]^s7[xi(12)]^s8[xi(13)]^s5[xi( 8)];
     k[5] = s5[xi( 1)]^s6[xi( 0)]^s7[xi(14)]^s8[xi(15)]^s6[xi(13)];
     k[6] = s5[xi( 7)]^s6[xi( 6)]^s7[xi( 8)]^s8[xi( 9)]^s7[xi( 3)];
     k[7] = s5[xi( 5)]^s6[xi( 4)]^s7[xi(10)]^s8[xi(11)]^s8[xi( 7)];
 
     z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)];
     z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)];
     z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)];
     z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)];
     k[8] = s5[zi( 3)]^s6[zi( 2)]^s7[zi(12)]^s8[zi(13)]^s5[zi( 9)];
     k[9] = s5[zi( 1)]^s6[zi( 0)]^s7[zi(14)]^s8[zi(15)]^s6[zi(12)];
     k[10]= s5[zi( 7)]^s6[zi( 6)]^s7[zi( 8)]^s8[zi( 9)]^s7[zi( 2)];
     k[11]= s5[zi( 5)]^s6[zi( 4)]^s7[zi(10)]^s8[zi(11)]^s8[zi( 6)];
 
     x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)];
     x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)];
     x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)];
     x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)];
     k[12]= s5[xi( 8)]^s6[xi( 9)]^s7[xi( 7)]^s8[xi( 6)]^s5[xi( 3)];
     k[13]= s5[xi(10)]^s6[xi(11)]^s7[xi( 5)]^s8[xi( 4)]^s6[xi( 7)];
     k[14]= s5[xi(12)]^s6[xi(13)]^s7[xi( 3)]^s8[xi( 2)]^s7[xi( 8)];
     k[15]= s5[xi(14)]^s6[xi(15)]^s7[xi( 1)]^s8[xi( 0)]^s8[xi(13)];
 
 #undef xi
 #undef zi
 }
 
 
 static gcry_err_code_t
 do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen )
 {
   static int initialized;
   static const char* selftest_failed;
   int i;
   u32 x[4];
   u32 z[4];
   u32 k[16];
 
   if( !initialized )
     {
       initialized = 1;
       selftest_failed = selftest();
       if( selftest_failed )
         log_error ("CAST5 selftest failed (%s).\n", selftest_failed );
     }
   if( selftest_failed )
     return GPG_ERR_SELFTEST_FAILED;
 
   if( keylen != 16 )
     return GPG_ERR_INV_KEYLEN;
 
   x[0] = buf_get_be32(key + 0);
   x[1] = buf_get_be32(key + 4);
   x[2] = buf_get_be32(key + 8);
   x[3] = buf_get_be32(key + 12);
 
   key_schedule( x, z, k );
   for(i=0; i < 16; i++ )
     c->Km[i] = k[i];
   key_schedule( x, z, k );
   for(i=0; i < 16; i++ )
     c->Kr[i] = k[i] & 0x1f;
 
 #ifdef USE_ARM_ASM
   for (i = 0; i < 4; i++)
     {
       byte Kr_arm[4];
 
       /* Convert rotate left to rotate right and add shift left
        * by 2.  */
       Kr_arm[0] = ((32 - c->Kr[4 * i + 0]) - 2) & 0x1f;
       Kr_arm[1] = ((32 - c->Kr[4 * i + 1]) - 2) & 0x1f;
       Kr_arm[2] = ((32 - c->Kr[4 * i + 2]) - 2) & 0x1f;
       Kr_arm[3] = ((32 - c->Kr[4 * i + 3]) - 2) & 0x1f;
 
       /* Endian friendly store.  */
       c->Kr_arm_enc[i] = Kr_arm[0] |
                         (Kr_arm[1] << 8) |
                         (Kr_arm[2] << 16) |
                         (Kr_arm[3] << 24);
       c->Kr_arm_dec[i] = Kr_arm[3] |
                         (Kr_arm[2] << 8) |
                         (Kr_arm[1] << 16) |
                         (Kr_arm[0] << 24);
 
       wipememory(Kr_arm, sizeof(Kr_arm));
     }
 #endif
 
   wipememory(x, sizeof x);
   wipememory(z, sizeof z);
   wipememory(k, sizeof k);
 
 #undef xi
 #undef zi
   return GPG_ERR_NO_ERROR;
 }
 
 static gcry_err_code_t
 cast_setkey (void *context, const byte *key, unsigned keylen,
              cipher_bulk_ops_t *bulk_ops)
 {
   CAST5_context *c = (CAST5_context *) context;
   gcry_err_code_t rc = do_cast_setkey (c, key, keylen);
 
   /* Setup bulk encryption routines.  */
   memset (bulk_ops, 0, sizeof(*bulk_ops));
   bulk_ops->cfb_dec = _gcry_cast5_cfb_dec;
   bulk_ops->cbc_dec = _gcry_cast5_cbc_dec;
   bulk_ops->ctr_enc = _gcry_cast5_ctr_enc;
 
   return rc;
 }
 
 
 gcry_cipher_spec_t _gcry_cipher_spec_cast5 =
   {
     GCRY_CIPHER_CAST5, {0, 0},
     "CAST5", NULL, NULL, CAST5_BLOCKSIZE, 128, sizeof (CAST5_context),
     cast_setkey, encrypt_block, decrypt_block
   };
diff --git a/cipher/crc-armv8-ce.c b/cipher/crc-armv8-ce.c
index 17e55548..e8c314d4 100644
--- a/cipher/crc-armv8-ce.c
+++ b/cipher/crc-armv8-ce.c
@@ -1,229 +1,229 @@
 /* crc-armv8-ce.c - ARMv8-CE PMULL accelerated CRC implementation
  * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 
 #include "bithelp.h"
 #include "bufhelp.h"
 
 
 #if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
     defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
 
 
 #define ALIGNED_16 __attribute__ ((aligned (16)))
 
 
 struct u16_unaligned_s
 {
   u16 a;
 } __attribute__((packed, aligned (1), may_alias));
 
 struct u32_unaligned_s
 {
   u32 a;
 } __attribute__((packed, aligned (1), may_alias));
 
 
 /* Constants structure for generic reflected/non-reflected CRC32 PMULL
  * functions. */
 struct crc32_consts_s
 {
   /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
   u64 k[6];
   /* my_p: { floor(x^64 / P(x)), P(x) } */
   u64 my_p[2];
 };
 
 /* PMULL constants for CRC32 and CRC32RFC1510. */
 static const struct crc32_consts_s crc32_consts ALIGNED_16 =
 {
   { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
     U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
     U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
     U64_C(0x163cd6124), 0                   /* y = 2 */
   },
   { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
     U64_C(0x1f7011641), U64_C(0x1db710641)
   }
 };
 
 /* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
 static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 =
 {
   { /* k[6] = x^(32*y) mod P(x) << 32*/
     U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
     U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
     U64_C(0xd9fe8c00) << 32, 0                        /* y = 2 */
   },
   { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
     U64_C(0x1f845fe24), U64_C(0x1864cfb00)
   }
 };
 
 
 u32 _gcry_crc32r_armv8_ce_reduction_4 (u32 data, u32 crc,
 				       const struct crc32_consts_s *consts);
 void _gcry_crc32r_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
                                  const struct crc32_consts_s *consts);
 
 u32 _gcry_crc32_armv8_ce_reduction_4 (u32 data, u32 crc,
 				      const struct crc32_consts_s *consts);
 void _gcry_crc32_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
                                 const struct crc32_consts_s *consts);
 
 
 static inline void
 crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
 		     const struct crc32_consts_s *consts)
 {
   u32 crc = *pcrc;
   u32 data;
 
   while (inlen >= 4)
     {
       data = ((const struct u32_unaligned_s *)inbuf)->a;
       data ^= crc;
 
       inlen -= 4;
       inbuf += 4;
 
       crc = _gcry_crc32r_armv8_ce_reduction_4 (data, 0, consts);
     }
 
   switch (inlen)
     {
     case 0:
       break;
     case 1:
       data = inbuf[0];
       data ^= crc;
       data <<= 24;
       crc >>= 8;
       crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
       break;
     case 2:
       data = ((const struct u16_unaligned_s *)inbuf)->a;
       data ^= crc;
       data <<= 16;
       crc >>= 16;
       crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
       break;
     case 3:
       data = ((const struct u16_unaligned_s *)inbuf)->a;
       data |= inbuf[2] << 16;
       data ^= crc;
       data <<= 8;
       crc >>= 24;
       crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
       break;
     }
 
   *pcrc = crc;
 }
 
 static inline void
 crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
 		    const struct crc32_consts_s *consts)
 {
   u32 crc = *pcrc;
   u32 data;
 
   while (inlen >= 4)
     {
       data = ((const struct u32_unaligned_s *)inbuf)->a;
       data ^= crc;
       data = _gcry_bswap32(data);
 
       inlen -= 4;
       inbuf += 4;
 
       crc = _gcry_crc32_armv8_ce_reduction_4 (data, 0, consts);
     }
 
   switch (inlen)
     {
     case 0:
       break;
     case 1:
       data = inbuf[0];
       data ^= crc;
       data = data & 0xffU;
       crc = _gcry_bswap32(crc >> 8);
       crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
       break;
     case 2:
       data = ((const struct u16_unaligned_s *)inbuf)->a;
       data ^= crc;
       data = _gcry_bswap32(data << 16);
       crc = _gcry_bswap32(crc >> 16);
       crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
       break;
     case 3:
       data = ((const struct u16_unaligned_s *)inbuf)->a;
       data |= inbuf[2] << 16;
       data ^= crc;
       data = _gcry_bswap32(data << 8);
       crc = crc & 0xff000000U;
       crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
       break;
     }
 
   *pcrc = crc;
 }
 
 void
 _gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen)
 {
   const struct crc32_consts_s *consts = &crc32_consts;
 
   if (!inlen)
     return;
 
   if (inlen >= 16)
     _gcry_crc32r_armv8_ce_bulk (pcrc, inbuf, inlen, consts);
   else
     crc32r_less_than_16 (pcrc, inbuf, inlen, consts);
 }
 
 void
 _gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen)
 {
   const struct crc32_consts_s *consts = &crc24rfc2440_consts;
 
   if (!inlen)
     return;
 
   /* Note: *pcrc in input endian. */
 
   if (inlen >= 16)
     _gcry_crc32_armv8_ce_bulk (pcrc, inbuf, inlen, consts);
   else
     crc32_less_than_16 (pcrc, inbuf, inlen, consts);
 }
 
 #endif
diff --git a/cipher/crc-intel-pclmul.c b/cipher/crc-intel-pclmul.c
index 8c8b1915..825dee2a 100644
--- a/cipher/crc-intel-pclmul.c
+++ b/cipher/crc-intel-pclmul.c
@@ -1,939 +1,939 @@
 /* crc-intel-pclmul.c - Intel PCLMUL accelerated CRC implementation
  * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 
 #include "bithelp.h"
 #include "bufhelp.h"
 
 
 #if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT) && \
     __GNUC__ >= 4 &&                                                   \
     ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
 
 
 #if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
 /* Prevent compiler from issuing SSE instructions between asm blocks. */
 #  pragma GCC target("no-sse")
 #endif
 #if __clang__
 #  pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function)
 #endif
 
 
 #define ALWAYS_INLINE inline __attribute__((always_inline))
 #define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
 
 #define ASM_FUNC_ATTR        NO_INSTRUMENT_FUNCTION
 #define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
 
 
 #define ALIGNED_16 __attribute__ ((aligned (16)))
 
 
 struct u16_unaligned_s
 {
   u16 a;
 } __attribute__((packed, aligned (1), may_alias));
 
 
 /* Constants structure for generic reflected/non-reflected CRC32 CLMUL
  * functions. */
 struct crc32_consts_s
 {
   /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
   u64 k[6];
   /* my_p: { floor(x^64 / P(x)), P(x) } */
   u64 my_p[2];
 };
 
 
 /* CLMUL constants for CRC32 and CRC32RFC1510. */
 static const struct crc32_consts_s crc32_consts ALIGNED_16 =
 {
   { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
     U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
     U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
     U64_C(0x163cd6124), 0                   /* y = 2 */
   },
   { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
     U64_C(0x1f7011641), U64_C(0x1db710641)
   }
 };
 
 /* CLMUL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
 static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 =
 {
   { /* k[6] = x^(32*y) mod P(x) << 32*/
     U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
     U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
     U64_C(0xd9fe8c00) << 32, 0                        /* y = 2 */
   },
   { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
     U64_C(0x1f845fe24), U64_C(0x1864cfb00)
   }
 };
 
 /* Common constants for CRC32 algorithms. */
 static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_16 =
   {
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   };
 static const byte crc32_shuf_shift[3 * 16] ALIGNED_16 =
   {
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
     0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   };
 static const byte *crc32_bswap_shuf = &crc32_shuf_shift[16];
 static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_16 =
   {
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   };
 static const u64 crc32_merge9to15_shuf[15 - 9 + 1][2] ALIGNED_16 =
   {
     { U64_C(0x0706050403020100), U64_C(0xffffffffffffff0f) }, /* 9 */
     { U64_C(0x0706050403020100), U64_C(0xffffffffffff0f0e) },
     { U64_C(0x0706050403020100), U64_C(0xffffffffff0f0e0d) },
     { U64_C(0x0706050403020100), U64_C(0xffffffff0f0e0d0c) },
     { U64_C(0x0706050403020100), U64_C(0xffffff0f0e0d0c0b) },
     { U64_C(0x0706050403020100), U64_C(0xffff0f0e0d0c0b0a) },
     { U64_C(0x0706050403020100), U64_C(0xff0f0e0d0c0b0a09) }, /* 15 */
   };
 static const u64 crc32_merge5to7_shuf[7 - 5 + 1][2] ALIGNED_16 =
   {
     { U64_C(0xffffff0703020100), U64_C(0xffffffffffffffff) }, /* 5 */
     { U64_C(0xffff070603020100), U64_C(0xffffffffffffffff) },
     { U64_C(0xff07060503020100), U64_C(0xffffffffffffffff) }, /* 7 */
   };
 
 /* PCLMUL functions for reflected CRC32. */
 static ASM_FUNC_ATTR_INLINE void
 crc32_reflected_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
 		      const struct crc32_consts_s *consts)
 {
   if (inlen >= 8 * 16)
     {
       asm volatile ("movd %[crc], %%xmm4\n\t"
 		    "movdqu %[inbuf_0], %%xmm0\n\t"
 		    "movdqu %[inbuf_1], %%xmm1\n\t"
 		    "movdqu %[inbuf_2], %%xmm2\n\t"
 		    "movdqu %[inbuf_3], %%xmm3\n\t"
 		    "pxor %%xmm4, %%xmm0\n\t"
 		    :
 		    : [inbuf_0] "m" (inbuf[0 * 16]),
 		      [inbuf_1] "m" (inbuf[1 * 16]),
 		      [inbuf_2] "m" (inbuf[2 * 16]),
 		      [inbuf_3] "m" (inbuf[3 * 16]),
 		      [crc] "m" (*pcrc)
 		    );
 
       inbuf += 4 * 16;
       inlen -= 4 * 16;
 
       asm volatile ("movdqa %[k1k2], %%xmm4\n\t"
 		    :
 		    : [k1k2] "m" (consts->k[1 - 1])
 		    );
 
       /* Fold by 4. */
       while (inlen >= 4 * 16)
 	{
 	  asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t"
 			"movdqa %%xmm0, %%xmm6\n\t"
 			"pclmulqdq $0x00, %%xmm4, %%xmm0\n\t"
 			"pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
 			"pxor %%xmm5, %%xmm0\n\t"
 			"pxor %%xmm6, %%xmm0\n\t"
 
 			"movdqu %[inbuf_1], %%xmm5\n\t"
 			"movdqa %%xmm1, %%xmm6\n\t"
 			"pclmulqdq $0x00, %%xmm4, %%xmm1\n\t"
 			"pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
 			"pxor %%xmm5, %%xmm1\n\t"
 			"pxor %%xmm6, %%xmm1\n\t"
 
 			"movdqu %[inbuf_2], %%xmm5\n\t"
 			"movdqa %%xmm2, %%xmm6\n\t"
 			"pclmulqdq $0x00, %%xmm4, %%xmm2\n\t"
 			"pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
 			"pxor %%xmm5, %%xmm2\n\t"
 			"pxor %%xmm6, %%xmm2\n\t"
 
 			"movdqu %[inbuf_3], %%xmm5\n\t"
 			"movdqa %%xmm3, %%xmm6\n\t"
 			"pclmulqdq $0x00, %%xmm4, %%xmm3\n\t"
 			"pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
 			"pxor %%xmm5, %%xmm3\n\t"
 			"pxor %%xmm6, %%xmm3\n\t"
 			:
 			: [inbuf_0] "m" (inbuf[0 * 16]),
 			  [inbuf_1] "m" (inbuf[1 * 16]),
 			  [inbuf_2] "m" (inbuf[2 * 16]),
 			  [inbuf_3] "m" (inbuf[3 * 16])
 			);
 
 	  inbuf += 4 * 16;
 	  inlen -= 4 * 16;
 	}
 
       asm volatile ("movdqa %[k3k4], %%xmm6\n\t"
 		    "movdqa %[my_p], %%xmm5\n\t"
 		    :
 		    : [k3k4] "m" (consts->k[3 - 1]),
 		      [my_p] "m" (consts->my_p[0])
 		    );
 
       /* Fold 4 to 1. */
 
       asm volatile ("movdqa %%xmm0, %%xmm4\n\t"
 		    "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
 		    "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t"
 		    "pxor %%xmm4, %%xmm0\n\t"
 
 		    "movdqa %%xmm0, %%xmm4\n\t"
 		    "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
 		    "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
 		    "pxor %%xmm2, %%xmm0\n\t"
 		    "pxor %%xmm4, %%xmm0\n\t"
 
 		    "movdqa %%xmm0, %%xmm4\n\t"
 		    "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
 		    "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
 		    "pxor %%xmm3, %%xmm0\n\t"
 		    "pxor %%xmm4, %%xmm0\n\t"
 		    :
 		    :
 		    );
     }
   else
     {
       asm volatile ("movd %[crc], %%xmm1\n\t"
 		    "movdqu %[inbuf], %%xmm0\n\t"
 		    "movdqa %[k3k4], %%xmm6\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t"
 		    "movdqa %[my_p], %%xmm5\n\t"
 		    :
 		    : [inbuf] "m" (*inbuf),
 		      [crc] "m" (*pcrc),
 		      [k3k4] "m" (consts->k[3 - 1]),
 		      [my_p] "m" (consts->my_p[0])
 		    );
 
       inbuf += 16;
       inlen -= 16;
     }
 
   /* Fold by 1. */
   if (inlen >= 16)
     {
       while (inlen >= 16)
 	{
 	  /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */
 	  asm volatile ("movdqu %[inbuf], %%xmm2\n\t"
 			"movdqa %%xmm0, %%xmm1\n\t"
 			"pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
 			"pclmulqdq $0x11, %%xmm6, %%xmm1\n\t"
 			"pxor %%xmm2, %%xmm0\n\t"
 			"pxor %%xmm1, %%xmm0\n\t"
 			:
 			: [inbuf] "m" (*inbuf)
 			);
 
 	  inbuf += 16;
 	  inlen -= 16;
 	}
     }
 
   /* Partial fold. */
   if (inlen)
     {
       /* Load last input and add padding zeros. */
       asm volatile ("movdqu %[shr_shuf], %%xmm3\n\t"
 		    "movdqu %[shl_shuf], %%xmm4\n\t"
 		    "movdqu %[mask], %%xmm2\n\t"
 
 		    "movdqa %%xmm0, %%xmm1\n\t"
 		    "pshufb %%xmm4, %%xmm0\n\t"
 		    "movdqu %[inbuf], %%xmm4\n\t"
 		    "pshufb %%xmm3, %%xmm1\n\t"
 		    "pand %%xmm4, %%xmm2\n\t"
 		    "por %%xmm1, %%xmm2\n\t"
 
 		    "movdqa %%xmm0, %%xmm1\n\t"
 		    "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
 		    "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t"
 		    "pxor %%xmm2, %%xmm0\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t"
 		    :
 		    : [inbuf] "m" (*(inbuf - 16 + inlen)),
 		      [mask] "m" (crc32_partial_fold_input_mask[inlen]),
 		      [shl_shuf] "m" (crc32_refl_shuf_shift[inlen]),
 		      [shr_shuf] "m" (crc32_refl_shuf_shift[inlen + 16])
 		    );
 
       inbuf += inlen;
       inlen -= inlen;
     }
 
   /* Final fold. */
   asm volatile (/* reduce 128-bits to 96-bits */
 		"movdqa %%xmm0, %%xmm1\n\t"
 		"pclmulqdq $0x10, %%xmm6, %%xmm0\n\t"
 		"psrldq $8, %%xmm1\n\t"
 		"pxor %%xmm1, %%xmm0\n\t"
 
 		/* reduce 96-bits to 64-bits */
 		"pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */
 		"pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */
 		"pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */
 		"pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */
 
 		/* barrett reduction */
 		"pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */
 		"pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */
 		"pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
 		"pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
 		"pxor %%xmm1, %%xmm0\n\t"
 
 		/* store CRC */
 		"pextrd $2, %%xmm0, %[out]\n\t"
 		: [out] "=m" (*pcrc)
 		: [k5] "m" (consts->k[5 - 1])
 	        );
 }
 
 static ASM_FUNC_ATTR_INLINE void
 crc32_reflected_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
 			      const struct crc32_consts_s *consts)
 {
   if (inlen < 4)
     {
       u32 crc = *pcrc;
       u32 data;
 
       asm volatile ("movdqa %[my_p], %%xmm5\n\t"
 		    :
 		    : [my_p] "m" (consts->my_p[0])
 		    );
 
       if (inlen == 1)
 	{
 	  data = inbuf[0];
 	  data ^= crc;
 	  data <<= 24;
 	  crc >>= 8;
 	}
       else if (inlen == 2)
 	{
 	  data = ((const struct u16_unaligned_s *)inbuf)->a;
 	  data ^= crc;
 	  data <<= 16;
 	  crc >>= 16;
 	}
       else
 	{
 	  data = ((const struct u16_unaligned_s *)inbuf)->a;
 	  data |= inbuf[2] << 16;
 	  data ^= crc;
 	  data <<= 8;
 	  crc >>= 24;
 	}
 
       /* Barrett reduction */
       asm volatile ("movd %[in], %%xmm0\n\t"
 		    "movd %[crc], %%xmm1\n\t"
 
 		    "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
 		    "psllq $32, %%xmm1\n\t"
 		    "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */
 		    "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
 		    "pxor %%xmm1, %%xmm0\n\t"
 
 		    "pextrd $1, %%xmm0, %[out]\n\t"
 		    : [out] "=m" (*pcrc)
 		    : [in] "rm" (data),
 		      [crc] "rm" (crc)
 		    );
     }
   else if (inlen == 4)
     {
       /* Barrett reduction */
       asm volatile ("movd %[crc], %%xmm1\n\t"
 		    "movd %[in], %%xmm0\n\t"
 		    "movdqa %[my_p], %%xmm5\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t"
 
 		    "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
 		    "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */
 		    "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
 
 		    "pextrd $1, %%xmm0, %[out]\n\t"
 		    : [out] "=m" (*pcrc)
 		    : [in] "m" (*inbuf),
 		      [crc] "m" (*pcrc),
 		      [my_p] "m" (consts->my_p[0])
 		    );
     }
   else
     {
       asm volatile ("movdqu %[shuf], %%xmm4\n\t"
 		    "movd %[crc], %%xmm1\n\t"
 		    "movdqa %[my_p], %%xmm5\n\t"
 		    "movdqa %[k3k4], %%xmm6\n\t"
 		    :
 		    : [shuf] "m" (crc32_refl_shuf_shift[inlen]),
 		      [crc] "m" (*pcrc),
 		      [my_p] "m" (consts->my_p[0]),
 		      [k3k4] "m" (consts->k[3 - 1])
 		    );
 
       if (inlen >= 8)
 	{
 	  asm volatile ("movq %[inbuf], %%xmm0\n\t"
 			:
 			: [inbuf] "m" (*inbuf)
 			);
 	  if (inlen > 8)
 	    {
 	      asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/
 			    "movq %[inbuf_tail], %%xmm2\n\t"
 			    "punpcklqdq %%xmm2, %%xmm0\n\t"
 			    "pshufb %[merge_shuf], %%xmm0\n\t"
 			    :
 			    : [inbuf_tail] "m" (inbuf[inlen - 8]),
 			      [merge_shuf] "m"
 				(*crc32_merge9to15_shuf[inlen - 9])
 			    );
 	    }
 	}
       else
 	{
 	  asm volatile ("movd %[inbuf], %%xmm0\n\t"
 			"pinsrd $1, %[inbuf_tail], %%xmm0\n\t"
 			"pshufb %[merge_shuf], %%xmm0\n\t"
 			:
 			: [inbuf] "m" (*inbuf),
 			  [inbuf_tail] "m" (inbuf[inlen - 4]),
 			  [merge_shuf] "m"
 			    (*crc32_merge5to7_shuf[inlen - 5])
 			);
 	}
 
       /* Final fold. */
       asm volatile ("pxor %%xmm1, %%xmm0\n\t"
 		    "pshufb %%xmm4, %%xmm0\n\t"
 
 		    /* reduce 128-bits to 96-bits */
 		    "movdqa %%xmm0, %%xmm1\n\t"
 		    "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t"
 		    "psrldq $8, %%xmm1\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t" /* top 32-bit are zero */
 
 		    /* reduce 96-bits to 64-bits */
 		    "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */
 		    "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */
 		    "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */
 		    "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */
 
 		    /* barrett reduction */
 		    "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */
 		    "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */
 		    "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
 		    "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
 		    "pxor %%xmm1, %%xmm0\n\t"
 
 		    /* store CRC */
 		    "pextrd $2, %%xmm0, %[out]\n\t"
 		    : [out] "=m" (*pcrc)
 		    : [k5] "m" (consts->k[5 - 1])
 		    );
     }
 }
 
 /* PCLMUL functions for non-reflected CRC32. */
 static ASM_FUNC_ATTR_INLINE void
 crc32_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
 	    const struct crc32_consts_s *consts)
 {
   asm volatile ("movdqa %[bswap], %%xmm7\n\t"
 		:
 		: [bswap] "m" (*crc32_bswap_shuf)
 		);
 
   if (inlen >= 8 * 16)
     {
       asm volatile ("movd %[crc], %%xmm4\n\t"
 		    "movdqu %[inbuf_0], %%xmm0\n\t"
 		    "movdqu %[inbuf_1], %%xmm1\n\t"
 		    "movdqu %[inbuf_2], %%xmm2\n\t"
 		    "pxor %%xmm4, %%xmm0\n\t"
 		    "movdqu %[inbuf_3], %%xmm3\n\t"
 		    "pshufb %%xmm7, %%xmm0\n\t"
 		    "pshufb %%xmm7, %%xmm1\n\t"
 		    "pshufb %%xmm7, %%xmm2\n\t"
 		    "pshufb %%xmm7, %%xmm3\n\t"
 		    :
 		    : [inbuf_0] "m" (inbuf[0 * 16]),
 		      [inbuf_1] "m" (inbuf[1 * 16]),
 		      [inbuf_2] "m" (inbuf[2 * 16]),
 		      [inbuf_3] "m" (inbuf[3 * 16]),
 		      [crc] "m" (*pcrc)
 		    );
 
       inbuf += 4 * 16;
       inlen -= 4 * 16;
 
       asm volatile ("movdqa %[k1k2], %%xmm4\n\t"
 		    :
 		    : [k1k2] "m" (consts->k[1 - 1])
 		    );
 
       /* Fold by 4. */
       while (inlen >= 4 * 16)
 	{
 	  asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t"
 			"movdqa %%xmm0, %%xmm6\n\t"
 			"pshufb %%xmm7, %%xmm5\n\t"
 			"pclmulqdq $0x01, %%xmm4, %%xmm0\n\t"
 			"pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
 			"pxor %%xmm5, %%xmm0\n\t"
 			"pxor %%xmm6, %%xmm0\n\t"
 
 			"movdqu %[inbuf_1], %%xmm5\n\t"
 			"movdqa %%xmm1, %%xmm6\n\t"
 			"pshufb %%xmm7, %%xmm5\n\t"
 			"pclmulqdq $0x01, %%xmm4, %%xmm1\n\t"
 			"pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
 			"pxor %%xmm5, %%xmm1\n\t"
 			"pxor %%xmm6, %%xmm1\n\t"
 
 			"movdqu %[inbuf_2], %%xmm5\n\t"
 			"movdqa %%xmm2, %%xmm6\n\t"
 			"pshufb %%xmm7, %%xmm5\n\t"
 			"pclmulqdq $0x01, %%xmm4, %%xmm2\n\t"
 			"pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
 			"pxor %%xmm5, %%xmm2\n\t"
 			"pxor %%xmm6, %%xmm2\n\t"
 
 			"movdqu %[inbuf_3], %%xmm5\n\t"
 			"movdqa %%xmm3, %%xmm6\n\t"
 			"pshufb %%xmm7, %%xmm5\n\t"
 			"pclmulqdq $0x01, %%xmm4, %%xmm3\n\t"
 			"pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
 			"pxor %%xmm5, %%xmm3\n\t"
 			"pxor %%xmm6, %%xmm3\n\t"
 			:
 			: [inbuf_0] "m" (inbuf[0 * 16]),
 			  [inbuf_1] "m" (inbuf[1 * 16]),
 			  [inbuf_2] "m" (inbuf[2 * 16]),
 			  [inbuf_3] "m" (inbuf[3 * 16])
 			);
 
 	  inbuf += 4 * 16;
 	  inlen -= 4 * 16;
 	}
 
       asm volatile ("movdqa %[k3k4], %%xmm6\n\t"
 		    "movdqa %[my_p], %%xmm5\n\t"
 		    :
 		    : [k3k4] "m" (consts->k[3 - 1]),
 		      [my_p] "m" (consts->my_p[0])
 		    );
 
       /* Fold 4 to 1. */
 
       asm volatile ("movdqa %%xmm0, %%xmm4\n\t"
 		    "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
 		    "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t"
 		    "pxor %%xmm4, %%xmm0\n\t"
 
 		    "movdqa %%xmm0, %%xmm4\n\t"
 		    "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
 		    "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
 		    "pxor %%xmm2, %%xmm0\n\t"
 		    "pxor %%xmm4, %%xmm0\n\t"
 
 		    "movdqa %%xmm0, %%xmm4\n\t"
 		    "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
 		    "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
 		    "pxor %%xmm3, %%xmm0\n\t"
 		    "pxor %%xmm4, %%xmm0\n\t"
 		    :
 		    :
 		    );
     }
   else
     {
       asm volatile ("movd %[crc], %%xmm1\n\t"
 		    "movdqu %[inbuf], %%xmm0\n\t"
 		    "movdqa %[k3k4], %%xmm6\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t"
 		    "movdqa %[my_p], %%xmm5\n\t"
 		    "pshufb %%xmm7, %%xmm0\n\t"
 		    :
 		    : [inbuf] "m" (*inbuf),
 		      [crc] "m" (*pcrc),
 		      [k3k4] "m" (consts->k[3 - 1]),
 		      [my_p] "m" (consts->my_p[0])
 		    );
 
       inbuf += 16;
       inlen -= 16;
     }
 
   /* Fold by 1. */
   if (inlen >= 16)
     {
       while (inlen >= 16)
 	{
 	  /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */
 	  asm volatile ("movdqu %[inbuf], %%xmm2\n\t"
 			"movdqa %%xmm0, %%xmm1\n\t"
 			"pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
 			"pshufb %%xmm7, %%xmm2\n\t"
 			"pclmulqdq $0x10, %%xmm6, %%xmm1\n\t"
 			"pxor %%xmm2, %%xmm0\n\t"
 			"pxor %%xmm1, %%xmm0\n\t"
 			:
 			: [inbuf] "m" (*inbuf)
 			);
 
 	  inbuf += 16;
 	  inlen -= 16;
 	}
     }
 
   /* Partial fold. */
   if (inlen)
     {
       /* Load last input and add padding zeros. */
       asm volatile ("movdqu %[shl_shuf], %%xmm4\n\t"
 		    "movdqu %[shr_shuf], %%xmm3\n\t"
 		    "movdqu %[mask], %%xmm2\n\t"
 
 		    "movdqa %%xmm0, %%xmm1\n\t"
 		    "pshufb %%xmm4, %%xmm0\n\t"
 		    "movdqu %[inbuf], %%xmm4\n\t"
 		    "pshufb %%xmm3, %%xmm1\n\t"
 		    "pand %%xmm4, %%xmm2\n\t"
 		    "por %%xmm1, %%xmm2\n\t"
 
 		    "pshufb %%xmm7, %%xmm2\n\t"
 
 		    "movdqa %%xmm0, %%xmm1\n\t"
 		    "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
 		    "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t"
 		    "pxor %%xmm2, %%xmm0\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t"
 		    :
 		    : [inbuf] "m" (*(inbuf - 16 + inlen)),
 		      [mask] "m" (crc32_partial_fold_input_mask[inlen]),
 		      [shl_shuf] "m" (crc32_refl_shuf_shift[32 - inlen]),
 		      [shr_shuf] "m" (crc32_shuf_shift[inlen + 16])
 		    );
 
       inbuf += inlen;
       inlen -= inlen;
     }
 
   /* Final fold. */
   asm volatile (/* reduce 128-bits to 96-bits */
 		"movdqa %%xmm0, %%xmm1\n\t"
 		"pclmulqdq $0x11, %%xmm6, %%xmm0\n\t"
 		"pslldq $8, %%xmm1\n\t"
 		"pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */
 
 		/* reduce 96-bits to 64-bits */
 		"pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */
 		"pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */
 		"pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */
 		"pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */
 
 		/* barrett reduction */
 		"pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */
 		"pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */
 		"psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */
 		"pclmulqdq $0x10, %%xmm5, %%xmm0\n\t"
 		"pxor %%xmm1, %%xmm0\n\t"
 
 		/* store CRC in input endian */
 		"movd %%xmm0, %%eax\n\t"
 		"bswapl %%eax\n\t"
 		"movl %%eax, %[out]\n\t"
 		: [out] "=m" (*pcrc)
 		: [k5] "m" (consts->k[5 - 1])
 		: "eax" );
 }
 
 static ASM_FUNC_ATTR_INLINE void
 crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
 		    const struct crc32_consts_s *consts)
 {
   if (inlen < 4)
     {
       u32 crc = *pcrc;
       u32 data;
 
       asm volatile ("movdqa %[my_p], %%xmm5\n\t"
 		    :
 		    : [my_p] "m" (consts->my_p[0])
 		    );
 
       if (inlen == 1)
 	{
 	  data = inbuf[0];
 	  data ^= crc;
 	  data = _gcry_bswap32(data << 24);
 	  crc = _gcry_bswap32(crc >> 8);
 	}
       else if (inlen == 2)
 	{
 	  data = ((const struct u16_unaligned_s *)inbuf)->a;
 	  data ^= crc;
 	  data = _gcry_bswap32(data << 16);
 	  crc = _gcry_bswap32(crc >> 16);
 	}
       else
 	{
 	  data = ((const struct u16_unaligned_s *)inbuf)->a;
 	  data |= inbuf[2] << 16;
 	  data ^= crc;
 	  data = _gcry_bswap32(data << 8);
 	  crc = _gcry_bswap32(crc >> 24);
 	}
 
       /* Barrett reduction */
       asm volatile ("movd %[in], %%xmm0\n\t"
 		    "psllq $32, %%xmm0\n\t" /* [00][00][xx][00] */
 		    "movd %[crc], %%xmm1\n\t"
 
 		    "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */
 		    "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
 		    "pxor %%xmm1, %%xmm0\n\t"
 
 		    /* store CRC in input endian */
 		    "movd %%xmm0, %%eax\n\t"
 		    "bswapl %%eax\n\t"
 		    "movl %%eax, %[out]\n\t"
 		    : [out] "=m" (*pcrc)
 		    : [in] "r" (data),
 		      [crc] "r" (crc)
 		    : "eax" );
     }
   else if (inlen == 4)
     {
       /* Barrett reduction */
       asm volatile ("movd %[crc], %%xmm0\n\t"
 		    "movd %[in], %%xmm1\n\t"
 		    "movdqa %[my_p], %%xmm5\n\t"
 		    :
 		    : [in] "m" (*inbuf),
 		      [crc] "m" (*pcrc),
 		      [my_p] "m" (consts->my_p[0])
 		    : "cc" );
 
       asm volatile ("pxor %%xmm1, %%xmm0\n\t"
 		    "pshufb %[bswap], %%xmm0\n\t" /* [xx][00][00][00] */
 
 		    "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */
 		    "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
 		    :
 		    : [bswap] "m" (*crc32_bswap_shuf)
 		    : "cc" );
 
       asm volatile (/* store CRC in input endian */
 		    "movd %%xmm0, %%eax\n\t"
 		    "bswapl %%eax\n\t"
 		    "movl %%eax, %[out]\n\t"
 		    : [out] "=m" (*pcrc)
 		    :
 		    : "eax", "cc" );
     }
   else
     {
       asm volatile ("movdqu %[shuf], %%xmm7\n\t"
 		    "movd %[crc], %%xmm1\n\t"
 		    "movdqa %[my_p], %%xmm5\n\t"
 		    "movdqa %[k3k4], %%xmm6\n\t"
 		    :
 		    : [shuf] "m" (crc32_shuf_shift[32 - inlen]),
 		      [crc] "m" (*pcrc),
 		      [my_p] "m" (consts->my_p[0]),
 		      [k3k4] "m" (consts->k[3 - 1])
 		    );
 
       if (inlen >= 8)
 	{
 	  asm volatile ("movq %[inbuf], %%xmm0\n\t"
 			:
 			: [inbuf] "m" (*inbuf)
 			);
 	  if (inlen > 8)
 	    {
 	      asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/
 			    "movq %[inbuf_tail], %%xmm2\n\t"
 			    "punpcklqdq %%xmm2, %%xmm0\n\t"
 			    "pshufb %[merge_shuf], %%xmm0\n\t"
 			    :
 			    : [inbuf_tail] "m" (inbuf[inlen - 8]),
 			      [merge_shuf] "m"
 				(*crc32_merge9to15_shuf[inlen - 9])
 			    );
 	    }
 	}
       else
 	{
 	  asm volatile ("movd %[inbuf], %%xmm0\n\t"
 			"pinsrd $1, %[inbuf_tail], %%xmm0\n\t"
 			"pshufb %[merge_shuf], %%xmm0\n\t"
 			:
 			: [inbuf] "m" (*inbuf),
 			  [inbuf_tail] "m" (inbuf[inlen - 4]),
 			  [merge_shuf] "m"
 			    (*crc32_merge5to7_shuf[inlen - 5])
 			);
 	}
 
       /* Final fold. */
       asm volatile ("pxor %%xmm1, %%xmm0\n\t"
 		    "pshufb %%xmm7, %%xmm0\n\t"
 
 		    /* reduce 128-bits to 96-bits */
 		    "movdqa %%xmm0, %%xmm1\n\t"
 		    "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t"
 		    "pslldq $8, %%xmm1\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */
 
 		    /* reduce 96-bits to 64-bits */
 		    "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */
 		    "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */
 		    "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */
 		    "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */
 
 		    /* barrett reduction */
 		    "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */
 		    "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */
 		    "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */
 		    "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t"
 		    "pxor %%xmm1, %%xmm0\n\t"
 
 		    /* store CRC in input endian */
 		    "movd %%xmm0, %%eax\n\t"
 		    "bswapl %%eax\n\t"
 		    "movl %%eax, %[out]\n\t"
 		    : [out] "=m" (*pcrc)
 		    : [k5] "m" (consts->k[5 - 1])
 		    : "eax" );
     }
 }
 
 void ASM_FUNC_ATTR
 _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen)
 {
   const struct crc32_consts_s *consts = &crc32_consts;
 #if defined(__x86_64__) && defined(__WIN64__)
   char win64tmp[2 * 16];
 
   /* XMM6-XMM7 need to be restored after use. */
   asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
                 "movdqu %%xmm7, 1*16(%0)\n\t"
                 :
                 : "r" (win64tmp)
                 : "memory");
 #endif
 
   if (!inlen)
     return;
 
   if (inlen >= 16)
     crc32_reflected_bulk(pcrc, inbuf, inlen, consts);
   else
     crc32_reflected_less_than_16(pcrc, inbuf, inlen, consts);
 
 #if defined(__x86_64__) && defined(__WIN64__)
   /* Restore used registers. */
   asm volatile("movdqu 0*16(%0), %%xmm6\n\t"
                "movdqu 1*16(%0), %%xmm7\n\t"
                :
                : "r" (win64tmp)
                : "memory");
 #endif
 }
 
 void ASM_FUNC_ATTR
 _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen)
 {
   const struct crc32_consts_s *consts = &crc24rfc2440_consts;
 #if defined(__x86_64__) && defined(__WIN64__)
   char win64tmp[2 * 16];
 
   /* XMM6-XMM7 need to be restored after use. */
   asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
                 "movdqu %%xmm7, 1*16(%0)\n\t"
                 :
                 : "r" (win64tmp)
                 : "memory");
 #endif
 
   if (!inlen)
     return;
 
   /* Note: *pcrc in input endian. */
 
   if (inlen >= 16)
     crc32_bulk(pcrc, inbuf, inlen, consts);
   else
     crc32_less_than_16(pcrc, inbuf, inlen, consts);
 
 #if defined(__x86_64__) && defined(__WIN64__)
   /* Restore used registers. */
   asm volatile("movdqu 0*16(%0), %%xmm6\n\t"
                "movdqu 1*16(%0), %%xmm7\n\t"
                :
                : "r" (win64tmp)
                : "memory");
 #endif
 }
 
 #if __clang__
 #  pragma clang attribute pop
 #endif
 
 #endif /* USE_INTEL_PCLMUL */
diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c
index b9a40130..6eadccc8 100644
--- a/cipher/crc-ppc.c
+++ b/cipher/crc-ppc.c
@@ -1,656 +1,656 @@
 /* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation
  * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 
 #include "bithelp.h"
 #include "bufhelp.h"
 
 
 #if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
     defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
     __GNUC__ >= 4
 
 #include <altivec.h>
 #include "bufhelp.h"
 
 
 #define ALWAYS_INLINE inline __attribute__((always_inline))
 #define NO_INLINE __attribute__((noinline))
 #define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
 
 #define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
 #define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
 #define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
 
 #define ALIGNED_64 __attribute__ ((aligned (64)))
 
 
 typedef vector unsigned char vector16x_u8;
 typedef vector unsigned int vector4x_u32;
 typedef vector unsigned long long vector2x_u64;
 
 
 /* Constants structure for generic reflected/non-reflected CRC32 PMULL
  * functions. */
 struct crc32_consts_s
 {
   /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
   unsigned long long k[6];
   /* my_p: { floor(x^64 / P(x)), P(x) } */
   unsigned long long my_p[2];
 };
 
 /* PMULL constants for CRC32 and CRC32RFC1510. */
 static const struct crc32_consts_s crc32_consts ALIGNED_64 =
 {
   { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
     U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
     U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
     U64_C(0x163cd6124), 0                   /* y = 2 */
   },
   { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
     U64_C(0x1f7011641), U64_C(0x1db710641)
   }
 };
 
 /* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
 static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_64 =
 {
   { /* k[6] = x^(32*y) mod P(x) << 32*/
     U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
     U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
     U64_C(0xd9fe8c00) << 32, 0                        /* y = 2 */
   },
   { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
     U64_C(0x1f845fe24), U64_C(0x1864cfb00)
   }
 };
 
 
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 asm_vpmsumd(vector2x_u64 a, vector2x_u64 b)
 {
   __asm__("vpmsumd %0, %1, %2"
 	  : "=v" (a)
 	  : "v" (a), "v" (b));
   return a;
 }
 
 
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 asm_swap_u64(vector2x_u64 a)
 {
   __asm__("xxswapd %x0, %x1"
 	  : "=wa" (a)
 	  : "wa" (a));
   return a;
 }
 
 
 static ASM_FUNC_ATTR_INLINE vector4x_u32
 vec_sld_u32(vector4x_u32 a, vector4x_u32 b, unsigned int idx)
 {
   return vec_sld (a, b, (4 * idx) & 15);
 }
 
 
 static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_64 =
   {
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   };
 static const byte crc32_shuf_shift[3 * 16] ALIGNED_64 =
   {
     0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
     0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
     0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
     0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
     0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
     0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
   };
 static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_64 =
   {
     0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
     0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
     0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
   };
 static const vector16x_u8 bswap_const ALIGNED_64 =
   { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
 
 
 #define CRC_VEC_SWAP(v) ({ vector2x_u64 __vecu64 = (v); \
                            vec_perm(__vecu64, __vecu64, bswap_const); })
 
 #ifdef WORDS_BIGENDIAN
 # define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }
 # define CRC_VEC_U64_LOAD(offs, ptr) \
 	  asm_swap_u64(asm_vec_u64_load(offs, ptr))
 # define CRC_VEC_U64_LOAD_LE(offs, ptr) \
 	  CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr))
 # define CRC_VEC_U64_LOAD_BE(offs, ptr) \
 	  asm_vec_u64_load(offs, ptr)
 # define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)
 # define CRC_VEC_SWAP_TO_BE(v) (v)
 # define VEC_U64_LO 1
 # define VEC_U64_HI 0
 
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 asm_vec_u64_load(unsigned long offset, const void *ptr)
 {
   vector2x_u64 vecu64;
 #if __GNUC__ >= 4
   if (__builtin_constant_p (offset) && offset == 0)
     __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
 		      : "=wa" (vecu64)
 		      : "r" ((uintptr_t)ptr)
 		      : "memory");
   else
 #endif
     __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
 		      : "=wa" (vecu64)
 		      : "r" (offset), "r" ((uintptr_t)ptr)
 		      : "memory", "r0");
   return vecu64;
 }
 #else
 # define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }
 # define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr)
 # define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr)
 # define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
 # define CRC_VEC_SWAP_TO_LE(v) (v)
 # define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
 # define VEC_U64_LO 0
 # define VEC_U64_HI 1
 
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 asm_vec_u64_load_le(unsigned long offset, const void *ptr)
 {
   vector2x_u64 vecu64;
 #if __GNUC__ >= 4
   if (__builtin_constant_p (offset) && offset == 0)
     __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
 		      : "=wa" (vecu64)
 		      : "r" ((uintptr_t)ptr)
 		      : "memory");
   else
 #endif
     __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
 		      : "=wa" (vecu64)
 		      : "r" (offset), "r" ((uintptr_t)ptr)
 		      : "memory", "r0");
   return asm_swap_u64(vecu64);
 }
 
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 asm_vec_u64_load_be(unsigned int offset, const void *ptr)
 {
   static const vector16x_u8 vec_load_le_const =
     { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };
   vector2x_u64 vecu64;
 
 #if __GNUC__ >= 4
   if (__builtin_constant_p (offset) && offset == 0)
     __asm__ ("lxvd2x %%vs32,0,%1\n\t"
 	     "vperm %0,%%v0,%%v0,%2\n\t"
 	     : "=v" (vecu64)
 	     : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const)
 	     : "memory", "v0");
 #endif
   else
     __asm__ ("lxvd2x %%vs32,%1,%2\n\t"
 	     "vperm %0,%%v0,%%v0,%3\n\t"
 	     : "=v" (vecu64)
 	     : "r" (offset), "r" ((uintptr_t)(ptr)),
 	       "v" (vec_load_le_const)
 	     : "memory", "r0", "v0");
 
   return vecu64;
 }
 #endif
 
 
 static ASM_FUNC_ATTR_INLINE void
 crc32r_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
 		     const struct crc32_consts_s *consts)
 {
   vector4x_u32 zero = { 0, 0, 0, 0 };
   vector2x_u64 low_64bit_mask = CRC_VEC_U64_DEF((u64)-1, 0);
   vector2x_u64 low_32bit_mask = CRC_VEC_U64_DEF((u32)-1, 0);
   vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
   vector2x_u64 k1k2 = CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]);
   vector2x_u64 k3k4 = CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]);
   vector2x_u64 k4lo = CRC_VEC_U64_DEF(k3k4[VEC_U64_HI], 0);
   vector2x_u64 k5lo = CRC_VEC_U64_LOAD(0, &consts->k[5 - 1]);
   vector2x_u64 crc = CRC_VEC_U64_DEF(*pcrc, 0);
   vector2x_u64 crc0, crc1, crc2, crc3;
   vector2x_u64 v0;
 
   if (inlen >= 8 * 16)
     {
       crc0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);
       crc0 ^= crc;
       crc1 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);
       crc2 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);
       crc3 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);
 
       inbuf += 4 * 16;
       inlen -= 4 * 16;
 
       /* Fold by 4. */
       while (inlen >= 4 * 16)
 	{
 	  v0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);
 	  crc0 = asm_vpmsumd(crc0, k1k2) ^ v0;
 
 	  v0 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);
 	  crc1 = asm_vpmsumd(crc1, k1k2) ^ v0;
 
 	  v0 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);
 	  crc2 = asm_vpmsumd(crc2, k1k2) ^ v0;
 
 	  v0 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);
 	  crc3 = asm_vpmsumd(crc3, k1k2) ^ v0;
 
 	  inbuf += 4 * 16;
 	  inlen -= 4 * 16;
 	}
 
       /* Fold 4 to 1. */
       crc1 ^= asm_vpmsumd(crc0, k3k4);
       crc2 ^= asm_vpmsumd(crc1, k3k4);
       crc3 ^= asm_vpmsumd(crc2, k3k4);
       crc = crc3;
     }
   else
     {
       v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);
       crc ^= v0;
 
       inbuf += 16;
       inlen -= 16;
     }
 
   /* Fold by 1. */
   while (inlen >= 16)
     {
       v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);
       crc = asm_vpmsumd(k3k4, crc);
       crc ^= v0;
 
       inbuf += 16;
       inlen -= 16;
     }
 
   /* Partial fold. */
   if (inlen)
     {
       /* Load last input and add padding zeros. */
       vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask);
       vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(inlen, crc32_refl_shuf_shift);
       vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_refl_shuf_shift);
 
       v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);
       v0 &= mask;
 
       crc = CRC_VEC_SWAP_TO_LE(crc);
       v0 |= (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
 				   (vector16x_u8)shr_shuf);
       crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
 				   (vector16x_u8)shl_shuf);
       crc = asm_vpmsumd(k3k4, crc);
       crc ^= v0;
 
       inbuf += inlen;
       inlen -= inlen;
     }
 
   /* Final fold. */
 
   /* reduce 128-bits to 96-bits */
   v0 = asm_swap_u64(crc);
   v0 &= low_64bit_mask;
   crc = asm_vpmsumd(k4lo, crc);
   crc ^= v0;
 
   /* reduce 96-bits to 64-bits */
   v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
 				 (vector4x_u32)crc, 3);  /* [x0][x3][x2][x1] */
   v0 &= low_64bit_mask;                                  /* [00][00][x2][x1] */
   crc = crc & low_32bit_mask;                            /* [00][00][00][x0] */
   crc = v0 ^ asm_vpmsumd(k5lo, crc);                     /* [00][00][xx][xx] */
 
   /* barrett reduction */
   v0 = crc << 32;                                        /* [00][00][x0][00] */
   v0 = asm_vpmsumd(my_p, v0);
   v0 = asm_swap_u64(v0);
   v0 = asm_vpmsumd(my_p, v0);
   crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
 				  zero, 1);              /* [00][x1][x0][00] */
   crc ^= v0;
 
   *pcrc = (u32)crc[VEC_U64_HI];
 }
 
 
 static ASM_FUNC_ATTR_INLINE u32
 crc32r_ppc8_ce_reduction_4 (u32 data, u32 crc,
 			    const struct crc32_consts_s *consts)
 {
   vector4x_u32 zero = { 0, 0, 0, 0 };
   vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
   vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data, 0);
   v0 = asm_vpmsumd(v0, my_p);                          /* [00][00][xx][xx] */
   v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,
 				 zero, 3);             /* [x0][00][00][00] */
   v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,
 				 (vector4x_u32)v0, 3); /* [00][x0][00][00] */
   v0 = asm_vpmsumd(v0, my_p);                          /* [00][00][xx][xx] */
   return (v0[VEC_U64_LO] >> 32) ^ crc;
 }
 
 
 static ASM_FUNC_ATTR_INLINE void
 crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
 		     const struct crc32_consts_s *consts)
 {
   u32 crc = *pcrc;
   u32 data;
 
   while (inlen >= 4)
     {
       data = buf_get_le32(inbuf);
       data ^= crc;
 
       inlen -= 4;
       inbuf += 4;
 
       crc = crc32r_ppc8_ce_reduction_4 (data, 0, consts);
     }
 
   switch (inlen)
     {
     case 0:
       break;
     case 1:
       data = inbuf[0];
       data ^= crc;
       data <<= 24;
       crc >>= 8;
       crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
       break;
     case 2:
       data = inbuf[0] << 0;
       data |= inbuf[1] << 8;
       data ^= crc;
       data <<= 16;
       crc >>= 16;
       crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
       break;
     case 3:
       data = inbuf[0] << 0;
       data |= inbuf[1] << 8;
       data |= inbuf[2] << 16;
       data ^= crc;
       data <<= 8;
       crc >>= 24;
       crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
       break;
     }
 
   *pcrc = crc;
 }
 
 
 static ASM_FUNC_ATTR_INLINE void
 crc32_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
 		    const struct crc32_consts_s *consts)
 {
   vector4x_u32 zero = { 0, 0, 0, 0 };
   vector2x_u64 low_96bit_mask = CRC_VEC_U64_DEF(~0, ~((u64)(u32)-1 << 32));
   vector2x_u64 p_my = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->my_p[0]));
   vector2x_u64 p_my_lo, p_my_hi;
   vector2x_u64 k2k1 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]));
   vector2x_u64 k4k3 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]));
   vector2x_u64 k4hi = CRC_VEC_U64_DEF(0, consts->k[4 - 1]);
   vector2x_u64 k5hi = CRC_VEC_U64_DEF(0, consts->k[5 - 1]);
   vector2x_u64 crc = CRC_VEC_U64_DEF(0, _gcry_bswap64(*pcrc));
   vector2x_u64 crc0, crc1, crc2, crc3;
   vector2x_u64 v0;
 
   if (inlen >= 8 * 16)
     {
       crc0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);
       crc0 ^= crc;
       crc1 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);
       crc2 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);
       crc3 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);
 
       inbuf += 4 * 16;
       inlen -= 4 * 16;
 
       /* Fold by 4. */
       while (inlen >= 4 * 16)
 	{
 	  v0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);
 	  crc0 = asm_vpmsumd(crc0, k2k1) ^ v0;
 
 	  v0 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);
 	  crc1 = asm_vpmsumd(crc1, k2k1) ^ v0;
 
 	  v0 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);
 	  crc2 = asm_vpmsumd(crc2, k2k1) ^ v0;
 
 	  v0 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);
 	  crc3 = asm_vpmsumd(crc3, k2k1) ^ v0;
 
 	  inbuf += 4 * 16;
 	  inlen -= 4 * 16;
 	}
 
       /* Fold 4 to 1. */
       crc1 ^= asm_vpmsumd(crc0, k4k3);
       crc2 ^= asm_vpmsumd(crc1, k4k3);
       crc3 ^= asm_vpmsumd(crc2, k4k3);
       crc = crc3;
     }
   else
     {
       v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);
       crc ^= v0;
 
       inbuf += 16;
       inlen -= 16;
     }
 
   /* Fold by 1. */
   while (inlen >= 16)
     {
       v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);
       crc = asm_vpmsumd(k4k3, crc);
       crc ^= v0;
 
       inbuf += 16;
       inlen -= 16;
     }
 
   /* Partial fold. */
   if (inlen)
     {
       /* Load last input and add padding zeros. */
       vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask);
       vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(32 - inlen, crc32_refl_shuf_shift);
       vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_shuf_shift);
 
       v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);
       v0 &= mask;
 
       crc = CRC_VEC_SWAP_TO_LE(crc);
       crc2 = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
 				    (vector16x_u8)shr_shuf);
       v0 |= crc2;
       v0 = CRC_VEC_SWAP(v0);
       crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
 				   (vector16x_u8)shl_shuf);
       crc = asm_vpmsumd(k4k3, crc);
       crc ^= v0;
 
       inbuf += inlen;
       inlen -= inlen;
     }
 
   /* Final fold. */
 
   /* reduce 128-bits to 96-bits */
   v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
 				 (vector4x_u32)zero, 2);
   crc = asm_vpmsumd(k4hi, crc);
   crc ^= v0; /* bottom 32-bit are zero */
 
   /* reduce 96-bits to 64-bits */
   v0 = crc & low_96bit_mask;    /* [00][x2][x1][00] */
   crc >>= 32;                   /* [00][x3][00][x0] */
   crc = asm_vpmsumd(k5hi, crc); /* [00][xx][xx][00] */
   crc ^= v0;                    /* top and bottom 32-bit are zero */
 
   /* barrett reduction */
   p_my_hi = p_my;
   p_my_lo = p_my;
   p_my_hi[VEC_U64_LO] = 0;
   p_my_lo[VEC_U64_HI] = 0;
   v0 = crc >> 32;                                        /* [00][00][00][x1] */
   crc = asm_vpmsumd(p_my_hi, crc);                       /* [00][xx][xx][xx] */
   crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
 				  (vector4x_u32)crc, 3); /* [x0][00][x2][x1] */
   crc = asm_vpmsumd(p_my_lo, crc);                       /* [00][xx][xx][xx] */
   crc ^= v0;
 
   *pcrc = _gcry_bswap32(crc[VEC_U64_LO]);
 }
 
 
 static ASM_FUNC_ATTR_INLINE u32
 crc32_ppc8_ce_reduction_4 (u32 data, u32 crc,
 			   const struct crc32_consts_s *consts)
 {
   vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
   vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data << 32, 0);
   v0 = asm_vpmsumd(v0, my_p); /* [00][x1][x0][00] */
   v0[VEC_U64_LO] = 0;         /* [00][x1][00][00] */
   v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */
   return _gcry_bswap32(v0[VEC_U64_LO]) ^ crc;
 }
 
 
 static ASM_FUNC_ATTR_INLINE void
 crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
 		    const struct crc32_consts_s *consts)
 {
   u32 crc = *pcrc;
   u32 data;
 
   while (inlen >= 4)
     {
       data = buf_get_le32(inbuf);
       data ^= crc;
       data = _gcry_bswap32(data);
 
       inlen -= 4;
       inbuf += 4;
 
       crc = crc32_ppc8_ce_reduction_4 (data, 0, consts);
     }
 
   switch (inlen)
     {
     case 0:
       break;
     case 1:
       data = inbuf[0];
       data ^= crc;
       data = data & 0xffU;
       crc = crc >> 8;
       crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
       break;
     case 2:
       data = inbuf[0] << 0;
       data |= inbuf[1] << 8;
       data ^= crc;
       data = _gcry_bswap32(data << 16);
       crc = crc >> 16;
       crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
       break;
     case 3:
       data = inbuf[0] << 0;
       data |= inbuf[1] << 8;
       data |= inbuf[2] << 16;
       data ^= crc;
       data = _gcry_bswap32(data << 8);
       crc = crc >> 24;
       crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
       break;
     }
 
   *pcrc = crc;
 }
 
 void ASM_FUNC_ATTR
 _gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen)
 {
   const struct crc32_consts_s *consts = &crc32_consts;
 
   if (!inlen)
     return;
 
   if (inlen >= 16)
     crc32r_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);
   else
     crc32r_less_than_16 (pcrc, inbuf, inlen, consts);
 }
 
 void ASM_FUNC_ATTR
 _gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen)
 {
   const struct crc32_consts_s *consts = &crc24rfc2440_consts;
 
   if (!inlen)
     return;
 
   /* Note: *pcrc in input endian. */
 
   if (inlen >= 16)
     crc32_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);
   else
     crc32_less_than_16 (pcrc, inbuf, inlen, consts);
 }
 
 #endif
diff --git a/cipher/crc.c b/cipher/crc.c
index b38869ec..cdff0648 100644
--- a/cipher/crc.c
+++ b/cipher/crc.c
@@ -1,955 +1,955 @@
 /* crc.c - Cyclic redundancy checks.
  * Copyright (C) 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 #include "cipher.h"
 
 #include "bithelp.h"
 #include "bufhelp.h"
 
 
 /* USE_INTEL_PCLMUL indicates whether to compile CRC with Intel PCLMUL/SSE4.1
  * code.  */
 #undef USE_INTEL_PCLMUL
 #if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT)
 # if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
 #  if __GNUC__ >= 4
 #   define USE_INTEL_PCLMUL 1
 #  endif
 # endif
 #endif /* USE_INTEL_PCLMUL */
 
 /* USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */
 #undef USE_ARM_PMULL
 #if defined(ENABLE_ARM_CRYPTO_SUPPORT)
 # if defined(__AARCH64EL__) && \
     defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
     defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
 #  define USE_ARM_PMULL 1
 # endif
 #endif /* USE_ARM_PMULL */
 
 /* USE_PPC_VPMSUM indicates whether to enable PowerPC vector
  * accelerated code. */
 #undef USE_PPC_VPMSUM
 #ifdef ENABLE_PPC_CRYPTO_SUPPORT
 # if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
      defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
 #  if __GNUC__ >= 4
 #   define USE_PPC_VPMSUM 1
 #  endif
 # endif
 #endif /* USE_PPC_VPMSUM */
 
 
 typedef struct
 {
   u32 CRC;
 #ifdef USE_INTEL_PCLMUL
   unsigned int use_pclmul:1;           /* Intel PCLMUL shall be used.  */
 #endif
 #ifdef USE_ARM_PMULL
   unsigned int use_pmull:1;            /* ARMv8 PMULL shall be used. */
 #endif
 #ifdef USE_PPC_VPMSUM
   unsigned int use_vpmsum:1;           /* POWER vpmsum shall be used. */
 #endif
   byte buf[4];
 }
 CRC_CONTEXT;
 
 
 #ifdef USE_INTEL_PCLMUL
 /*-- crc-intel-pclmul.c --*/
 void _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen);
 void _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf,
 				      size_t inlen);
 #endif
 
 #ifdef USE_ARM_PMULL
 /*-- crc-armv8-ce.c --*/
 void _gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen);
 void _gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf,
 					size_t inlen);
 #endif
 
 #ifdef USE_PPC_VPMSUM
 /*-- crc-ppc.c --*/
 void _gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen);
 void _gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf,
 				     size_t inlen);
 #endif
 
 
 /*
  * Code generated by universal_crc by Danjel McGougan
  *
  * CRC parameters used:
  *   bits:       32
  *   poly:       0x04c11db7
  *   init:       0xffffffff
  *   xor:        0xffffffff
  *   reverse:    true
  *   non-direct: false
  *
  * CRC of the string "123456789" is 0xcbf43926
  */
 
 static const u32 crc32_table[1024] = {
   0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
   0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
   0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
   0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
   0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
   0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
   0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
   0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
   0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
   0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
   0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
   0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
   0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
   0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
   0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
   0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
   0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
   0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
   0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
   0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
   0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
   0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
   0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
   0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
   0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
   0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
   0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
   0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
   0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
   0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
   0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
   0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
   0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
   0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
   0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
   0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
   0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
   0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
   0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
   0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
   0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
   0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
   0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
   0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
   0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
   0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
   0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
   0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
   0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
   0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
   0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
   0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
   0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
   0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
   0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
   0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
   0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
   0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
   0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
   0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
   0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
   0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
   0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
   0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
   0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3,
   0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7,
   0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb,
   0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf,
   0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192,
   0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496,
   0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a,
   0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e,
   0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761,
   0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265,
   0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69,
   0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d,
   0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530,
   0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034,
   0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38,
   0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c,
   0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6,
   0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2,
   0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce,
   0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca,
   0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97,
   0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93,
   0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f,
   0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b,
   0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864,
   0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60,
   0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c,
   0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768,
   0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35,
   0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31,
   0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d,
   0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539,
   0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88,
   0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c,
   0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180,
   0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484,
   0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9,
   0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd,
   0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1,
   0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5,
   0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a,
   0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e,
   0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522,
   0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026,
   0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b,
   0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f,
   0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773,
   0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277,
   0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d,
   0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189,
   0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85,
   0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81,
   0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc,
   0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8,
   0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4,
   0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0,
   0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f,
   0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b,
   0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27,
   0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23,
   0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e,
   0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a,
   0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876,
   0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72,
   0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59,
   0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685,
   0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1,
   0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d,
   0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29,
   0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5,
   0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91,
   0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d,
   0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9,
   0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065,
   0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901,
   0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd,
   0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9,
   0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315,
   0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71,
   0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad,
   0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399,
   0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45,
   0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221,
   0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd,
   0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9,
   0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835,
   0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151,
   0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d,
   0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579,
   0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5,
   0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1,
   0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d,
   0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609,
   0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5,
   0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1,
   0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d,
   0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9,
   0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05,
   0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461,
   0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd,
   0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9,
   0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75,
   0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711,
   0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd,
   0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339,
   0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5,
   0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281,
   0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d,
   0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049,
   0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895,
   0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1,
   0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d,
   0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819,
   0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5,
   0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1,
   0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d,
   0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69,
   0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5,
   0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1,
   0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d,
   0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9,
   0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625,
   0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41,
   0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d,
   0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89,
   0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555,
   0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31,
   0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed,
   0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee,
   0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9,
   0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701,
   0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056,
   0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871,
   0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26,
   0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e,
   0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9,
   0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0,
   0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787,
   0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f,
   0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68,
   0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f,
   0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018,
   0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0,
   0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7,
   0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3,
   0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084,
   0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c,
   0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b,
   0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c,
   0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b,
   0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3,
   0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4,
   0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed,
   0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba,
   0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002,
   0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755,
   0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72,
   0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825,
   0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d,
   0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca,
   0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5,
   0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82,
   0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a,
   0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d,
   0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a,
   0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d,
   0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5,
   0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2,
   0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb,
   0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc,
   0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04,
   0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953,
   0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174,
   0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623,
   0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b,
   0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc,
   0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8,
   0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf,
   0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907,
   0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50,
   0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677,
   0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120,
   0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98,
   0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf,
   0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6,
   0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981,
   0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639,
   0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e,
   0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949,
   0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e,
   0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6,
   0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1
 };
 
 /* CRC32 */
 
 static inline u32
 crc32_next (u32 crc, byte data)
 {
   return (crc >> 8) ^ crc32_table[(crc & 0xff) ^ data];
 }
 
 /*
  * Process 4 bytes in one go
  */
 static inline u32
 crc32_next4 (u32 crc, u32 data)
 {
   crc ^= data;
   crc = crc32_table[(crc & 0xff) + 0x300] ^
         crc32_table[((crc >> 8) & 0xff) + 0x200] ^
         crc32_table[((crc >> 16) & 0xff) + 0x100] ^
         crc32_table[(crc >> 24) & 0xff];
   return crc;
 }
 
 static void
 crc32_init (void *context, unsigned int flags)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   u32 hwf = _gcry_get_hw_features ();
 
 #ifdef USE_INTEL_PCLMUL
   ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
 #endif
 #ifdef USE_ARM_PMULL
   ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
 #endif
 #ifdef USE_PPC_VPMSUM
   ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
 #endif
 
   (void)flags;
   (void)hwf;
 
   ctx->CRC = 0 ^ 0xffffffffL;
 }
 
 static void
 crc32_write (void *context, const void *inbuf_arg, size_t inlen)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   const byte *inbuf = inbuf_arg;
   u32 crc;
 
 #ifdef USE_INTEL_PCLMUL
   if (ctx->use_pclmul)
     {
       _gcry_crc32_intel_pclmul(&ctx->CRC, inbuf, inlen);
       return;
     }
 #endif
 #ifdef USE_ARM_PMULL
   if (ctx->use_pmull)
     {
       _gcry_crc32_armv8_ce_pmull(&ctx->CRC, inbuf, inlen);
       return;
     }
 #endif
 #ifdef USE_PPC_VPMSUM
   if (ctx->use_vpmsum)
     {
       _gcry_crc32_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
       return;
     }
 #endif
 
   if (!inbuf || !inlen)
     return;
 
   crc = ctx->CRC;
 
   while (inlen >= 16)
     {
       inlen -= 16;
       crc = crc32_next4(crc, buf_get_le32(&inbuf[0]));
       crc = crc32_next4(crc, buf_get_le32(&inbuf[4]));
       crc = crc32_next4(crc, buf_get_le32(&inbuf[8]));
       crc = crc32_next4(crc, buf_get_le32(&inbuf[12]));
       inbuf += 16;
     }
 
   while (inlen >= 4)
     {
       inlen -= 4;
       crc = crc32_next4(crc, buf_get_le32(inbuf));
       inbuf += 4;
     }
 
   while (inlen--)
     {
       crc = crc32_next(crc, *inbuf++);
     }
 
   ctx->CRC = crc;
 }
 
 static byte *
 crc32_read (void *context)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   return ctx->buf;
 }
 
 static void
 crc32_final (void *context)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   ctx->CRC ^= 0xffffffffL;
   buf_put_be32 (ctx->buf, ctx->CRC);
 }
 
 /* CRC32 a'la RFC 1510 */
 /* CRC of the string "123456789" is 0x2dfd2d88 */
 
 static void
 crc32rfc1510_init (void *context, unsigned int flags)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   u32 hwf = _gcry_get_hw_features ();
 
 #ifdef USE_INTEL_PCLMUL
   ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
 #endif
 #ifdef USE_ARM_PMULL
   ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
 #endif
 #ifdef USE_PPC_VPMSUM
   ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
 #endif
 
   (void)flags;
   (void)hwf;
 
   ctx->CRC = 0;
 }
 
 static void
 crc32rfc1510_final (void *context)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   buf_put_be32(ctx->buf, ctx->CRC);
 }
 
 /* CRC24 a'la RFC 2440 */
 /*
  * Code generated by universal_crc by Danjel McGougan
  *
  * CRC parameters used:
  *   bits:       24
  *   poly:       0x864cfb
  *   init:       0xb704ce
  *   xor:        0x000000
  *   reverse:    false
  *   non-direct: false
  *
  * CRC of the string "123456789" is 0x21cf02
  */
 
 static const u32 crc24_table[1024] =
 {
   0x00000000, 0x00fb4c86, 0x000dd58a, 0x00f6990c,
   0x00e1e693, 0x001aaa15, 0x00ec3319, 0x00177f9f,
   0x003981a1, 0x00c2cd27, 0x0034542b, 0x00cf18ad,
   0x00d86732, 0x00232bb4, 0x00d5b2b8, 0x002efe3e,
   0x00894ec5, 0x00720243, 0x00849b4f, 0x007fd7c9,
   0x0068a856, 0x0093e4d0, 0x00657ddc, 0x009e315a,
   0x00b0cf64, 0x004b83e2, 0x00bd1aee, 0x00465668,
   0x005129f7, 0x00aa6571, 0x005cfc7d, 0x00a7b0fb,
   0x00e9d10c, 0x00129d8a, 0x00e40486, 0x001f4800,
   0x0008379f, 0x00f37b19, 0x0005e215, 0x00feae93,
   0x00d050ad, 0x002b1c2b, 0x00dd8527, 0x0026c9a1,
   0x0031b63e, 0x00cafab8, 0x003c63b4, 0x00c72f32,
   0x00609fc9, 0x009bd34f, 0x006d4a43, 0x009606c5,
   0x0081795a, 0x007a35dc, 0x008cacd0, 0x0077e056,
   0x00591e68, 0x00a252ee, 0x0054cbe2, 0x00af8764,
   0x00b8f8fb, 0x0043b47d, 0x00b52d71, 0x004e61f7,
   0x00d2a319, 0x0029ef9f, 0x00df7693, 0x00243a15,
   0x0033458a, 0x00c8090c, 0x003e9000, 0x00c5dc86,
   0x00eb22b8, 0x00106e3e, 0x00e6f732, 0x001dbbb4,
   0x000ac42b, 0x00f188ad, 0x000711a1, 0x00fc5d27,
   0x005beddc, 0x00a0a15a, 0x00563856, 0x00ad74d0,
   0x00ba0b4f, 0x004147c9, 0x00b7dec5, 0x004c9243,
   0x00626c7d, 0x009920fb, 0x006fb9f7, 0x0094f571,
   0x00838aee, 0x0078c668, 0x008e5f64, 0x007513e2,
   0x003b7215, 0x00c03e93, 0x0036a79f, 0x00cdeb19,
   0x00da9486, 0x0021d800, 0x00d7410c, 0x002c0d8a,
   0x0002f3b4, 0x00f9bf32, 0x000f263e, 0x00f46ab8,
   0x00e31527, 0x001859a1, 0x00eec0ad, 0x00158c2b,
   0x00b23cd0, 0x00497056, 0x00bfe95a, 0x0044a5dc,
   0x0053da43, 0x00a896c5, 0x005e0fc9, 0x00a5434f,
   0x008bbd71, 0x0070f1f7, 0x008668fb, 0x007d247d,
   0x006a5be2, 0x00911764, 0x00678e68, 0x009cc2ee,
   0x00a44733, 0x005f0bb5, 0x00a992b9, 0x0052de3f,
   0x0045a1a0, 0x00beed26, 0x0048742a, 0x00b338ac,
   0x009dc692, 0x00668a14, 0x00901318, 0x006b5f9e,
   0x007c2001, 0x00876c87, 0x0071f58b, 0x008ab90d,
   0x002d09f6, 0x00d64570, 0x0020dc7c, 0x00db90fa,
   0x00ccef65, 0x0037a3e3, 0x00c13aef, 0x003a7669,
   0x00148857, 0x00efc4d1, 0x00195ddd, 0x00e2115b,
   0x00f56ec4, 0x000e2242, 0x00f8bb4e, 0x0003f7c8,
   0x004d963f, 0x00b6dab9, 0x004043b5, 0x00bb0f33,
   0x00ac70ac, 0x00573c2a, 0x00a1a526, 0x005ae9a0,
   0x0074179e, 0x008f5b18, 0x0079c214, 0x00828e92,
   0x0095f10d, 0x006ebd8b, 0x00982487, 0x00636801,
   0x00c4d8fa, 0x003f947c, 0x00c90d70, 0x003241f6,
   0x00253e69, 0x00de72ef, 0x0028ebe3, 0x00d3a765,
   0x00fd595b, 0x000615dd, 0x00f08cd1, 0x000bc057,
   0x001cbfc8, 0x00e7f34e, 0x00116a42, 0x00ea26c4,
   0x0076e42a, 0x008da8ac, 0x007b31a0, 0x00807d26,
   0x009702b9, 0x006c4e3f, 0x009ad733, 0x00619bb5,
   0x004f658b, 0x00b4290d, 0x0042b001, 0x00b9fc87,
   0x00ae8318, 0x0055cf9e, 0x00a35692, 0x00581a14,
   0x00ffaaef, 0x0004e669, 0x00f27f65, 0x000933e3,
   0x001e4c7c, 0x00e500fa, 0x001399f6, 0x00e8d570,
   0x00c62b4e, 0x003d67c8, 0x00cbfec4, 0x0030b242,
   0x0027cddd, 0x00dc815b, 0x002a1857, 0x00d154d1,
   0x009f3526, 0x006479a0, 0x0092e0ac, 0x0069ac2a,
   0x007ed3b5, 0x00859f33, 0x0073063f, 0x00884ab9,
   0x00a6b487, 0x005df801, 0x00ab610d, 0x00502d8b,
   0x00475214, 0x00bc1e92, 0x004a879e, 0x00b1cb18,
   0x00167be3, 0x00ed3765, 0x001bae69, 0x00e0e2ef,
   0x00f79d70, 0x000cd1f6, 0x00fa48fa, 0x0001047c,
   0x002ffa42, 0x00d4b6c4, 0x00222fc8, 0x00d9634e,
   0x00ce1cd1, 0x00355057, 0x00c3c95b, 0x003885dd,
   0x00000000, 0x00488f66, 0x00901ecd, 0x00d891ab,
   0x00db711c, 0x0093fe7a, 0x004b6fd1, 0x0003e0b7,
   0x00b6e338, 0x00fe6c5e, 0x0026fdf5, 0x006e7293,
   0x006d9224, 0x00251d42, 0x00fd8ce9, 0x00b5038f,
   0x006cc771, 0x00244817, 0x00fcd9bc, 0x00b456da,
   0x00b7b66d, 0x00ff390b, 0x0027a8a0, 0x006f27c6,
   0x00da2449, 0x0092ab2f, 0x004a3a84, 0x0002b5e2,
   0x00015555, 0x0049da33, 0x00914b98, 0x00d9c4fe,
   0x00d88ee3, 0x00900185, 0x0048902e, 0x00001f48,
   0x0003ffff, 0x004b7099, 0x0093e132, 0x00db6e54,
   0x006e6ddb, 0x0026e2bd, 0x00fe7316, 0x00b6fc70,
   0x00b51cc7, 0x00fd93a1, 0x0025020a, 0x006d8d6c,
   0x00b44992, 0x00fcc6f4, 0x0024575f, 0x006cd839,
   0x006f388e, 0x0027b7e8, 0x00ff2643, 0x00b7a925,
   0x0002aaaa, 0x004a25cc, 0x0092b467, 0x00da3b01,
   0x00d9dbb6, 0x009154d0, 0x0049c57b, 0x00014a1d,
   0x004b5141, 0x0003de27, 0x00db4f8c, 0x0093c0ea,
   0x0090205d, 0x00d8af3b, 0x00003e90, 0x0048b1f6,
   0x00fdb279, 0x00b53d1f, 0x006dacb4, 0x002523d2,
   0x0026c365, 0x006e4c03, 0x00b6dda8, 0x00fe52ce,
   0x00279630, 0x006f1956, 0x00b788fd, 0x00ff079b,
   0x00fce72c, 0x00b4684a, 0x006cf9e1, 0x00247687,
   0x00917508, 0x00d9fa6e, 0x00016bc5, 0x0049e4a3,
   0x004a0414, 0x00028b72, 0x00da1ad9, 0x009295bf,
   0x0093dfa2, 0x00db50c4, 0x0003c16f, 0x004b4e09,
   0x0048aebe, 0x000021d8, 0x00d8b073, 0x00903f15,
   0x00253c9a, 0x006db3fc, 0x00b52257, 0x00fdad31,
   0x00fe4d86, 0x00b6c2e0, 0x006e534b, 0x0026dc2d,
   0x00ff18d3, 0x00b797b5, 0x006f061e, 0x00278978,
   0x002469cf, 0x006ce6a9, 0x00b47702, 0x00fcf864,
   0x0049fbeb, 0x0001748d, 0x00d9e526, 0x00916a40,
   0x00928af7, 0x00da0591, 0x0002943a, 0x004a1b5c,
   0x0096a282, 0x00de2de4, 0x0006bc4f, 0x004e3329,
   0x004dd39e, 0x00055cf8, 0x00ddcd53, 0x00954235,
   0x002041ba, 0x0068cedc, 0x00b05f77, 0x00f8d011,
   0x00fb30a6, 0x00b3bfc0, 0x006b2e6b, 0x0023a10d,
   0x00fa65f3, 0x00b2ea95, 0x006a7b3e, 0x0022f458,
   0x002114ef, 0x00699b89, 0x00b10a22, 0x00f98544,
   0x004c86cb, 0x000409ad, 0x00dc9806, 0x00941760,
   0x0097f7d7, 0x00df78b1, 0x0007e91a, 0x004f667c,
   0x004e2c61, 0x0006a307, 0x00de32ac, 0x0096bdca,
   0x00955d7d, 0x00ddd21b, 0x000543b0, 0x004dccd6,
   0x00f8cf59, 0x00b0403f, 0x0068d194, 0x00205ef2,
   0x0023be45, 0x006b3123, 0x00b3a088, 0x00fb2fee,
   0x0022eb10, 0x006a6476, 0x00b2f5dd, 0x00fa7abb,
   0x00f99a0c, 0x00b1156a, 0x006984c1, 0x00210ba7,
   0x00940828, 0x00dc874e, 0x000416e5, 0x004c9983,
   0x004f7934, 0x0007f652, 0x00df67f9, 0x0097e89f,
   0x00ddf3c3, 0x00957ca5, 0x004ded0e, 0x00056268,
   0x000682df, 0x004e0db9, 0x00969c12, 0x00de1374,
   0x006b10fb, 0x00239f9d, 0x00fb0e36, 0x00b38150,
   0x00b061e7, 0x00f8ee81, 0x00207f2a, 0x0068f04c,
   0x00b134b2, 0x00f9bbd4, 0x00212a7f, 0x0069a519,
   0x006a45ae, 0x0022cac8, 0x00fa5b63, 0x00b2d405,
   0x0007d78a, 0x004f58ec, 0x0097c947, 0x00df4621,
   0x00dca696, 0x009429f0, 0x004cb85b, 0x0004373d,
   0x00057d20, 0x004df246, 0x009563ed, 0x00ddec8b,
   0x00de0c3c, 0x0096835a, 0x004e12f1, 0x00069d97,
   0x00b39e18, 0x00fb117e, 0x002380d5, 0x006b0fb3,
   0x0068ef04, 0x00206062, 0x00f8f1c9, 0x00b07eaf,
   0x0069ba51, 0x00213537, 0x00f9a49c, 0x00b12bfa,
   0x00b2cb4d, 0x00fa442b, 0x0022d580, 0x006a5ae6,
   0x00df5969, 0x0097d60f, 0x004f47a4, 0x0007c8c2,
   0x00042875, 0x004ca713, 0x009436b8, 0x00dcb9de,
   0x00000000, 0x00d70983, 0x00555f80, 0x00825603,
   0x0051f286, 0x0086fb05, 0x0004ad06, 0x00d3a485,
   0x0059a88b, 0x008ea108, 0x000cf70b, 0x00dbfe88,
   0x00085a0d, 0x00df538e, 0x005d058d, 0x008a0c0e,
   0x00491c91, 0x009e1512, 0x001c4311, 0x00cb4a92,
   0x0018ee17, 0x00cfe794, 0x004db197, 0x009ab814,
   0x0010b41a, 0x00c7bd99, 0x0045eb9a, 0x0092e219,
   0x0041469c, 0x00964f1f, 0x0014191c, 0x00c3109f,
   0x006974a4, 0x00be7d27, 0x003c2b24, 0x00eb22a7,
   0x00388622, 0x00ef8fa1, 0x006dd9a2, 0x00bad021,
   0x0030dc2f, 0x00e7d5ac, 0x006583af, 0x00b28a2c,
   0x00612ea9, 0x00b6272a, 0x00347129, 0x00e378aa,
   0x00206835, 0x00f761b6, 0x007537b5, 0x00a23e36,
   0x00719ab3, 0x00a69330, 0x0024c533, 0x00f3ccb0,
   0x0079c0be, 0x00aec93d, 0x002c9f3e, 0x00fb96bd,
   0x00283238, 0x00ff3bbb, 0x007d6db8, 0x00aa643b,
   0x0029a4ce, 0x00fead4d, 0x007cfb4e, 0x00abf2cd,
   0x00785648, 0x00af5fcb, 0x002d09c8, 0x00fa004b,
   0x00700c45, 0x00a705c6, 0x002553c5, 0x00f25a46,
   0x0021fec3, 0x00f6f740, 0x0074a143, 0x00a3a8c0,
   0x0060b85f, 0x00b7b1dc, 0x0035e7df, 0x00e2ee5c,
   0x00314ad9, 0x00e6435a, 0x00641559, 0x00b31cda,
   0x003910d4, 0x00ee1957, 0x006c4f54, 0x00bb46d7,
   0x0068e252, 0x00bfebd1, 0x003dbdd2, 0x00eab451,
   0x0040d06a, 0x0097d9e9, 0x00158fea, 0x00c28669,
   0x001122ec, 0x00c62b6f, 0x00447d6c, 0x009374ef,
   0x001978e1, 0x00ce7162, 0x004c2761, 0x009b2ee2,
   0x00488a67, 0x009f83e4, 0x001dd5e7, 0x00cadc64,
   0x0009ccfb, 0x00dec578, 0x005c937b, 0x008b9af8,
   0x00583e7d, 0x008f37fe, 0x000d61fd, 0x00da687e,
   0x00506470, 0x00876df3, 0x00053bf0, 0x00d23273,
   0x000196f6, 0x00d69f75, 0x0054c976, 0x0083c0f5,
   0x00a9041b, 0x007e0d98, 0x00fc5b9b, 0x002b5218,
   0x00f8f69d, 0x002fff1e, 0x00ada91d, 0x007aa09e,
   0x00f0ac90, 0x0027a513, 0x00a5f310, 0x0072fa93,
   0x00a15e16, 0x00765795, 0x00f40196, 0x00230815,
   0x00e0188a, 0x00371109, 0x00b5470a, 0x00624e89,
   0x00b1ea0c, 0x0066e38f, 0x00e4b58c, 0x0033bc0f,
   0x00b9b001, 0x006eb982, 0x00ecef81, 0x003be602,
   0x00e84287, 0x003f4b04, 0x00bd1d07, 0x006a1484,
   0x00c070bf, 0x0017793c, 0x00952f3f, 0x004226bc,
   0x00918239, 0x00468bba, 0x00c4ddb9, 0x0013d43a,
   0x0099d834, 0x004ed1b7, 0x00cc87b4, 0x001b8e37,
   0x00c82ab2, 0x001f2331, 0x009d7532, 0x004a7cb1,
   0x00896c2e, 0x005e65ad, 0x00dc33ae, 0x000b3a2d,
   0x00d89ea8, 0x000f972b, 0x008dc128, 0x005ac8ab,
   0x00d0c4a5, 0x0007cd26, 0x00859b25, 0x005292a6,
   0x00813623, 0x00563fa0, 0x00d469a3, 0x00036020,
   0x0080a0d5, 0x0057a956, 0x00d5ff55, 0x0002f6d6,
   0x00d15253, 0x00065bd0, 0x00840dd3, 0x00530450,
   0x00d9085e, 0x000e01dd, 0x008c57de, 0x005b5e5d,
   0x0088fad8, 0x005ff35b, 0x00dda558, 0x000aacdb,
   0x00c9bc44, 0x001eb5c7, 0x009ce3c4, 0x004bea47,
   0x00984ec2, 0x004f4741, 0x00cd1142, 0x001a18c1,
   0x009014cf, 0x00471d4c, 0x00c54b4f, 0x001242cc,
   0x00c1e649, 0x0016efca, 0x0094b9c9, 0x0043b04a,
   0x00e9d471, 0x003eddf2, 0x00bc8bf1, 0x006b8272,
   0x00b826f7, 0x006f2f74, 0x00ed7977, 0x003a70f4,
   0x00b07cfa, 0x00677579, 0x00e5237a, 0x00322af9,
   0x00e18e7c, 0x003687ff, 0x00b4d1fc, 0x0063d87f,
   0x00a0c8e0, 0x0077c163, 0x00f59760, 0x00229ee3,
   0x00f13a66, 0x002633e5, 0x00a465e6, 0x00736c65,
   0x00f9606b, 0x002e69e8, 0x00ac3feb, 0x007b3668,
   0x00a892ed, 0x007f9b6e, 0x00fdcd6d, 0x002ac4ee,
   0x00000000, 0x00520936, 0x00a4126c, 0x00f61b5a,
   0x004825d8, 0x001a2cee, 0x00ec37b4, 0x00be3e82,
   0x006b0636, 0x00390f00, 0x00cf145a, 0x009d1d6c,
   0x002323ee, 0x00712ad8, 0x00873182, 0x00d538b4,
   0x00d60c6c, 0x0084055a, 0x00721e00, 0x00201736,
   0x009e29b4, 0x00cc2082, 0x003a3bd8, 0x006832ee,
   0x00bd0a5a, 0x00ef036c, 0x00191836, 0x004b1100,
   0x00f52f82, 0x00a726b4, 0x00513dee, 0x000334d8,
   0x00ac19d8, 0x00fe10ee, 0x00080bb4, 0x005a0282,
   0x00e43c00, 0x00b63536, 0x00402e6c, 0x0012275a,
   0x00c71fee, 0x009516d8, 0x00630d82, 0x003104b4,
   0x008f3a36, 0x00dd3300, 0x002b285a, 0x0079216c,
   0x007a15b4, 0x00281c82, 0x00de07d8, 0x008c0eee,
   0x0032306c, 0x0060395a, 0x00962200, 0x00c42b36,
   0x00111382, 0x00431ab4, 0x00b501ee, 0x00e708d8,
   0x0059365a, 0x000b3f6c, 0x00fd2436, 0x00af2d00,
   0x00a37f36, 0x00f17600, 0x00076d5a, 0x0055646c,
   0x00eb5aee, 0x00b953d8, 0x004f4882, 0x001d41b4,
   0x00c87900, 0x009a7036, 0x006c6b6c, 0x003e625a,
   0x00805cd8, 0x00d255ee, 0x00244eb4, 0x00764782,
   0x0075735a, 0x00277a6c, 0x00d16136, 0x00836800,
   0x003d5682, 0x006f5fb4, 0x009944ee, 0x00cb4dd8,
   0x001e756c, 0x004c7c5a, 0x00ba6700, 0x00e86e36,
   0x005650b4, 0x00045982, 0x00f242d8, 0x00a04bee,
   0x000f66ee, 0x005d6fd8, 0x00ab7482, 0x00f97db4,
   0x00474336, 0x00154a00, 0x00e3515a, 0x00b1586c,
   0x006460d8, 0x003669ee, 0x00c072b4, 0x00927b82,
   0x002c4500, 0x007e4c36, 0x0088576c, 0x00da5e5a,
   0x00d96a82, 0x008b63b4, 0x007d78ee, 0x002f71d8,
   0x00914f5a, 0x00c3466c, 0x00355d36, 0x00675400,
   0x00b26cb4, 0x00e06582, 0x00167ed8, 0x004477ee,
   0x00fa496c, 0x00a8405a, 0x005e5b00, 0x000c5236,
   0x0046ff6c, 0x0014f65a, 0x00e2ed00, 0x00b0e436,
   0x000edab4, 0x005cd382, 0x00aac8d8, 0x00f8c1ee,
   0x002df95a, 0x007ff06c, 0x0089eb36, 0x00dbe200,
   0x0065dc82, 0x0037d5b4, 0x00c1ceee, 0x0093c7d8,
   0x0090f300, 0x00c2fa36, 0x0034e16c, 0x0066e85a,
   0x00d8d6d8, 0x008adfee, 0x007cc4b4, 0x002ecd82,
   0x00fbf536, 0x00a9fc00, 0x005fe75a, 0x000dee6c,
   0x00b3d0ee, 0x00e1d9d8, 0x0017c282, 0x0045cbb4,
   0x00eae6b4, 0x00b8ef82, 0x004ef4d8, 0x001cfdee,
   0x00a2c36c, 0x00f0ca5a, 0x0006d100, 0x0054d836,
   0x0081e082, 0x00d3e9b4, 0x0025f2ee, 0x0077fbd8,
   0x00c9c55a, 0x009bcc6c, 0x006dd736, 0x003fde00,
   0x003cead8, 0x006ee3ee, 0x0098f8b4, 0x00caf182,
   0x0074cf00, 0x0026c636, 0x00d0dd6c, 0x0082d45a,
   0x0057ecee, 0x0005e5d8, 0x00f3fe82, 0x00a1f7b4,
   0x001fc936, 0x004dc000, 0x00bbdb5a, 0x00e9d26c,
   0x00e5805a, 0x00b7896c, 0x00419236, 0x00139b00,
   0x00ada582, 0x00ffacb4, 0x0009b7ee, 0x005bbed8,
   0x008e866c, 0x00dc8f5a, 0x002a9400, 0x00789d36,
   0x00c6a3b4, 0x0094aa82, 0x0062b1d8, 0x0030b8ee,
   0x00338c36, 0x00618500, 0x00979e5a, 0x00c5976c,
   0x007ba9ee, 0x0029a0d8, 0x00dfbb82, 0x008db2b4,
   0x00588a00, 0x000a8336, 0x00fc986c, 0x00ae915a,
   0x0010afd8, 0x0042a6ee, 0x00b4bdb4, 0x00e6b482,
   0x00499982, 0x001b90b4, 0x00ed8bee, 0x00bf82d8,
   0x0001bc5a, 0x0053b56c, 0x00a5ae36, 0x00f7a700,
   0x00229fb4, 0x00709682, 0x00868dd8, 0x00d484ee,
   0x006aba6c, 0x0038b35a, 0x00cea800, 0x009ca136,
   0x009f95ee, 0x00cd9cd8, 0x003b8782, 0x00698eb4,
   0x00d7b036, 0x0085b900, 0x0073a25a, 0x0021ab6c,
   0x00f493d8, 0x00a69aee, 0x005081b4, 0x00028882,
   0x00bcb600, 0x00eebf36, 0x0018a46c, 0x004aad5a
 };
 
 static inline
 u32 crc24_init (void)
 {
   /* Transformed to 32-bit CRC by multiplied by x⁸ and then byte swapped. */
   return 0xce04b7; /* _gcry_bswap(0xb704ce << 8) */
 }
 
 static inline
 u32 crc24_next (u32 crc, byte data)
 {
   return (crc >> 8) ^ crc24_table[(crc & 0xff) ^ data];
 }
 
 /*
  * Process 4 bytes in one go
  */
 static inline
 u32 crc24_next4 (u32 crc, u32 data)
 {
   crc ^= data;
   crc = crc24_table[(crc & 0xff) + 0x300] ^
         crc24_table[((crc >> 8) & 0xff) + 0x200] ^
         crc24_table[((crc >> 16) & 0xff) + 0x100] ^
         crc24_table[(data >> 24) & 0xff];
   return crc;
 }
 
 static inline
 u32 crc24_final (u32 crc)
 {
   return crc & 0xffffff;
 }
 
 static void
 crc24rfc2440_init (void *context, unsigned int flags)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   u32 hwf = _gcry_get_hw_features ();
 
 #ifdef USE_INTEL_PCLMUL
   ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
 #endif
 #ifdef USE_ARM_PMULL
   ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
 #endif
 #ifdef USE_PPC_VPMSUM
   ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
 #endif
 
   (void)hwf;
   (void)flags;
 
   ctx->CRC = crc24_init();
 }
 
 static void
 crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen)
 {
   const unsigned char *inbuf = inbuf_arg;
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   u32 crc;
 
 #ifdef USE_INTEL_PCLMUL
   if (ctx->use_pclmul)
     {
       _gcry_crc24rfc2440_intel_pclmul(&ctx->CRC, inbuf, inlen);
       return;
     }
 #endif
 #ifdef USE_ARM_PMULL
   if (ctx->use_pmull)
     {
       _gcry_crc24rfc2440_armv8_ce_pmull(&ctx->CRC, inbuf, inlen);
       return;
     }
 #endif
 #ifdef USE_PPC_VPMSUM
   if (ctx->use_vpmsum)
     {
       _gcry_crc24rfc2440_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
       return;
     }
 #endif
 
   if (!inbuf || !inlen)
     return;
 
   crc = ctx->CRC;
 
   while (inlen >= 16)
     {
       inlen -= 16;
       crc = crc24_next4(crc, buf_get_le32(&inbuf[0]));
       crc = crc24_next4(crc, buf_get_le32(&inbuf[4]));
       crc = crc24_next4(crc, buf_get_le32(&inbuf[8]));
       crc = crc24_next4(crc, buf_get_le32(&inbuf[12]));
       inbuf += 16;
     }
 
   while (inlen >= 4)
     {
       inlen -= 4;
       crc = crc24_next4(crc, buf_get_le32(inbuf));
       inbuf += 4;
     }
 
   while (inlen--)
     {
       crc = crc24_next(crc, *inbuf++);
     }
 
   ctx->CRC = crc;
 }
 
 static void
 crc24rfc2440_final (void *context)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   ctx->CRC = crc24_final(ctx->CRC);
   buf_put_le32 (ctx->buf, ctx->CRC);
 }
 
 /* We allow the CRC algorithms even in FIPS mode because they are
    actually no cryptographic primitives.  */
 
 const gcry_md_spec_t _gcry_digest_spec_crc32 =
   {
     GCRY_MD_CRC32, {0, 1},
     "CRC32", NULL, 0, NULL, 4,
     crc32_init, crc32_write, crc32_final, crc32_read, NULL,
     NULL,
     sizeof (CRC_CONTEXT)
   };
 
 const gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510 =
   {
     GCRY_MD_CRC32_RFC1510, {0, 1},
     "CRC32RFC1510", NULL, 0, NULL, 4,
     crc32rfc1510_init, crc32_write, crc32rfc1510_final, crc32_read, NULL,
     NULL,
     sizeof (CRC_CONTEXT)
   };
 
 const gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440 =
   {
     GCRY_MD_CRC24_RFC2440, {0, 1},
     "CRC24RFC2440", NULL, 0, NULL, 3,
     crc24rfc2440_init, crc24rfc2440_write, crc24rfc2440_final, crc32_read, NULL,
     NULL,
     sizeof (CRC_CONTEXT)
   };
diff --git a/cipher/des.c b/cipher/des.c
index 7a81697a..8c899c2c 100644
--- a/cipher/des.c
+++ b/cipher/des.c
@@ -1,1435 +1,1435 @@
 /* des.c - DES and Triple-DES encryption/decryption Algorithm
  * Copyright (C) 1998, 1999, 2001, 2002, 2003,
  *               2008  Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * For a description of triple encryption, see:
  *   Bruce Schneier: Applied Cryptography. Second Edition.
  *   John Wiley & Sons, 1996. ISBN 0-471-12845-7. Pages 358 ff.
  * This implementation is according to the definition of DES in FIPS
  * PUB 46-2 from December 1993.
  */
 
 
 /*
  * Written by Michael Roth <mroth@nessie.de>, September 1998
  */
 
 
 /*
  *  U S A G E
  * ===========
  *
  * For DES or Triple-DES encryption/decryption you must initialize a proper
  * encryption context with a key.
  *
  * A DES key is 64bit wide but only 56bits of the key are used. The remaining
  * bits are parity bits and they will _not_ checked in this implementation, but
  * simply ignored.
  *
  * For Triple-DES you could use either two 64bit keys or three 64bit keys.
  * The parity bits will _not_ checked, too.
  *
  * After initializing a context with a key you could use this context to
  * encrypt or decrypt data in 64bit blocks in Electronic Codebook Mode.
  *
  * (In the examples below the slashes at the beginning and ending of comments
  * are omitted.)
  *
  * DES Example
  * -----------
  *     unsigned char key[8];
  *     unsigned char plaintext[8];
  *     unsigned char ciphertext[8];
  *     unsigned char recoverd[8];
  *     des_ctx context;
  *
  *     * Fill 'key' and 'plaintext' with some data *
  *     ....
  *
  *     * Set up the DES encryption context *
  *     des_setkey(context, key);
  *
  *     * Encrypt the plaintext *
  *     des_ecb_encrypt(context, plaintext, ciphertext);
  *
  *     * To recover the original plaintext from ciphertext use: *
  *     des_ecb_decrypt(context, ciphertext, recoverd);
  *
  *
  * Triple-DES Example
  * ------------------
  *     unsigned char key1[8];
  *     unsigned char key2[8];
  *     unsigned char key3[8];
  *     unsigned char plaintext[8];
  *     unsigned char ciphertext[8];
  *     unsigned char recoverd[8];
  *     tripledes_ctx context;
  *
  *     * If you would like to use two 64bit keys, fill 'key1' and'key2'
  *	 then setup the encryption context: *
  *     tripledes_set2keys(context, key1, key2);
  *
  *     * To use three 64bit keys with Triple-DES use: *
  *     tripledes_set3keys(context, key1, key2, key3);
  *
  *     * Encrypting plaintext with Triple-DES *
  *     tripledes_ecb_encrypt(context, plaintext, ciphertext);
  *
  *     * Decrypting ciphertext to recover the plaintext with Triple-DES *
  *     tripledes_ecb_decrypt(context, ciphertext, recoverd);
  *
  *
  * Selftest
  * --------
  *     char *error_msg;
  *
  *     * To perform a selftest of this DES/Triple-DES implementation use the
  *	 function selftest(). It will return an error string if there are
  *	 some problems with this library. *
  *
  *     if ( (error_msg = selftest()) )
  *     {
  *	   fprintf(stderr, "An error in the DES/Triple-DES implementation occurred: %s\n", error_msg);
  *	   abort();
  *     }
  */
 
 
 #include <config.h>
 #include <stdio.h>
 #include <string.h>	       /* memcpy, memcmp */
 #include "types.h"             /* for byte and u32 typedefs */
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
 #include "cipher-internal.h"
 
 
 #define DES_BLOCKSIZE 8
 
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 /* Helper macro to force alignment to 16 bytes.  */
 #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
 # define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
 #else
 # define ATTR_ALIGNED_16
 #endif
 
 #if defined(__GNUC__) && defined(__GNU_LIBRARY__)
 # define working_memcmp memcmp
 #else
 /*
  * According to the SunOS man page, memcmp returns indeterminate sign
  * depending on whether characters are signed or not.
  */
 static int
 working_memcmp( const void *_a, const void *_b, size_t n )
 {
     const char *a = _a;
     const char *b = _b;
     for( ; n; n--, a++, b++ )
 	if( *a != *b )
 	    return (int)(*(byte*)a) - (int)(*(byte*)b);
     return 0;
 }
 #endif
 
 /*
  * Encryption/Decryption context of DES
  */
 typedef struct _des_ctx
   {
     u32 encrypt_subkeys[32];
     u32 decrypt_subkeys[32];
   }
 des_ctx[1];
 
 /*
  * Encryption/Decryption context of Triple-DES
  */
 typedef struct _tripledes_ctx
   {
     u32 encrypt_subkeys[96];
     u32 decrypt_subkeys[96];
     struct {
       int no_weak_key;
     } flags;
   }
 tripledes_ctx[1];
 
 static void des_key_schedule (const byte *, u32 *);
 static int des_setkey (struct _des_ctx *, const byte *);
 static int des_ecb_crypt (struct _des_ctx *, const byte *, byte *, int);
 static int tripledes_set2keys (struct _tripledes_ctx *,
                                const byte *, const byte *);
 static int tripledes_set3keys (struct _tripledes_ctx *,
                                const byte *, const byte *, const byte *);
 static int tripledes_ecb_crypt (struct _tripledes_ctx *,
                                 const byte *, byte *, int);
 static int is_weak_key ( const byte *key );
 static const char *selftest (void);
 static unsigned int do_tripledes_encrypt(void *context, byte *outbuf,
 					 const byte *inbuf );
 static unsigned int do_tripledes_decrypt(void *context, byte *outbuf,
 					 const byte *inbuf );
 static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key,
                                            unsigned keylen,
                                            cipher_bulk_ops_t *bulk_ops);
 
 static int initialized;
 
 
 
 
 /*
  * The s-box values are permuted according to the 'primitive function P'
  * and are rotated one bit to the left.
  */
 static u32 sbox1[64] =
 {
   0x01010400, 0x00000000, 0x00010000, 0x01010404, 0x01010004, 0x00010404, 0x00000004, 0x00010000,
   0x00000400, 0x01010400, 0x01010404, 0x00000400, 0x01000404, 0x01010004, 0x01000000, 0x00000004,
   0x00000404, 0x01000400, 0x01000400, 0x00010400, 0x00010400, 0x01010000, 0x01010000, 0x01000404,
   0x00010004, 0x01000004, 0x01000004, 0x00010004, 0x00000000, 0x00000404, 0x00010404, 0x01000000,
   0x00010000, 0x01010404, 0x00000004, 0x01010000, 0x01010400, 0x01000000, 0x01000000, 0x00000400,
   0x01010004, 0x00010000, 0x00010400, 0x01000004, 0x00000400, 0x00000004, 0x01000404, 0x00010404,
   0x01010404, 0x00010004, 0x01010000, 0x01000404, 0x01000004, 0x00000404, 0x00010404, 0x01010400,
   0x00000404, 0x01000400, 0x01000400, 0x00000000, 0x00010004, 0x00010400, 0x00000000, 0x01010004
 };
 
 static u32 sbox2[64] =
 {
   0x80108020, 0x80008000, 0x00008000, 0x00108020, 0x00100000, 0x00000020, 0x80100020, 0x80008020,
   0x80000020, 0x80108020, 0x80108000, 0x80000000, 0x80008000, 0x00100000, 0x00000020, 0x80100020,
   0x00108000, 0x00100020, 0x80008020, 0x00000000, 0x80000000, 0x00008000, 0x00108020, 0x80100000,
   0x00100020, 0x80000020, 0x00000000, 0x00108000, 0x00008020, 0x80108000, 0x80100000, 0x00008020,
   0x00000000, 0x00108020, 0x80100020, 0x00100000, 0x80008020, 0x80100000, 0x80108000, 0x00008000,
   0x80100000, 0x80008000, 0x00000020, 0x80108020, 0x00108020, 0x00000020, 0x00008000, 0x80000000,
   0x00008020, 0x80108000, 0x00100000, 0x80000020, 0x00100020, 0x80008020, 0x80000020, 0x00100020,
   0x00108000, 0x00000000, 0x80008000, 0x00008020, 0x80000000, 0x80100020, 0x80108020, 0x00108000
 };
 
 static u32 sbox3[64] =
 {
   0x00000208, 0x08020200, 0x00000000, 0x08020008, 0x08000200, 0x00000000, 0x00020208, 0x08000200,
   0x00020008, 0x08000008, 0x08000008, 0x00020000, 0x08020208, 0x00020008, 0x08020000, 0x00000208,
   0x08000000, 0x00000008, 0x08020200, 0x00000200, 0x00020200, 0x08020000, 0x08020008, 0x00020208,
   0x08000208, 0x00020200, 0x00020000, 0x08000208, 0x00000008, 0x08020208, 0x00000200, 0x08000000,
   0x08020200, 0x08000000, 0x00020008, 0x00000208, 0x00020000, 0x08020200, 0x08000200, 0x00000000,
   0x00000200, 0x00020008, 0x08020208, 0x08000200, 0x08000008, 0x00000200, 0x00000000, 0x08020008,
   0x08000208, 0x00020000, 0x08000000, 0x08020208, 0x00000008, 0x00020208, 0x00020200, 0x08000008,
   0x08020000, 0x08000208, 0x00000208, 0x08020000, 0x00020208, 0x00000008, 0x08020008, 0x00020200
 };
 
 static u32 sbox4[64] =
 {
   0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802080, 0x00800081, 0x00800001, 0x00002001,
   0x00000000, 0x00802000, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00800080, 0x00800001,
   0x00000001, 0x00002000, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002001, 0x00002080,
   0x00800081, 0x00000001, 0x00002080, 0x00800080, 0x00002000, 0x00802080, 0x00802081, 0x00000081,
   0x00800080, 0x00800001, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00000000, 0x00802000,
   0x00002080, 0x00800080, 0x00800081, 0x00000001, 0x00802001, 0x00002081, 0x00002081, 0x00000080,
   0x00802081, 0x00000081, 0x00000001, 0x00002000, 0x00800001, 0x00002001, 0x00802080, 0x00800081,
   0x00002001, 0x00002080, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002000, 0x00802080
 };
 
 static u32 sbox5[64] =
 {
   0x00000100, 0x02080100, 0x02080000, 0x42000100, 0x00080000, 0x00000100, 0x40000000, 0x02080000,
   0x40080100, 0x00080000, 0x02000100, 0x40080100, 0x42000100, 0x42080000, 0x00080100, 0x40000000,
   0x02000000, 0x40080000, 0x40080000, 0x00000000, 0x40000100, 0x42080100, 0x42080100, 0x02000100,
   0x42080000, 0x40000100, 0x00000000, 0x42000000, 0x02080100, 0x02000000, 0x42000000, 0x00080100,
   0x00080000, 0x42000100, 0x00000100, 0x02000000, 0x40000000, 0x02080000, 0x42000100, 0x40080100,
   0x02000100, 0x40000000, 0x42080000, 0x02080100, 0x40080100, 0x00000100, 0x02000000, 0x42080000,
   0x42080100, 0x00080100, 0x42000000, 0x42080100, 0x02080000, 0x00000000, 0x40080000, 0x42000000,
   0x00080100, 0x02000100, 0x40000100, 0x00080000, 0x00000000, 0x40080000, 0x02080100, 0x40000100
 };
 
 static u32 sbox6[64] =
 {
   0x20000010, 0x20400000, 0x00004000, 0x20404010, 0x20400000, 0x00000010, 0x20404010, 0x00400000,
   0x20004000, 0x00404010, 0x00400000, 0x20000010, 0x00400010, 0x20004000, 0x20000000, 0x00004010,
   0x00000000, 0x00400010, 0x20004010, 0x00004000, 0x00404000, 0x20004010, 0x00000010, 0x20400010,
   0x20400010, 0x00000000, 0x00404010, 0x20404000, 0x00004010, 0x00404000, 0x20404000, 0x20000000,
   0x20004000, 0x00000010, 0x20400010, 0x00404000, 0x20404010, 0x00400000, 0x00004010, 0x20000010,
   0x00400000, 0x20004000, 0x20000000, 0x00004010, 0x20000010, 0x20404010, 0x00404000, 0x20400000,
   0x00404010, 0x20404000, 0x00000000, 0x20400010, 0x00000010, 0x00004000, 0x20400000, 0x00404010,
   0x00004000, 0x00400010, 0x20004010, 0x00000000, 0x20404000, 0x20000000, 0x00400010, 0x20004010
 };
 
 static u32 sbox7[64] =
 {
   0x00200000, 0x04200002, 0x04000802, 0x00000000, 0x00000800, 0x04000802, 0x00200802, 0x04200800,
   0x04200802, 0x00200000, 0x00000000, 0x04000002, 0x00000002, 0x04000000, 0x04200002, 0x00000802,
   0x04000800, 0x00200802, 0x00200002, 0x04000800, 0x04000002, 0x04200000, 0x04200800, 0x00200002,
   0x04200000, 0x00000800, 0x00000802, 0x04200802, 0x00200800, 0x00000002, 0x04000000, 0x00200800,
   0x04000000, 0x00200800, 0x00200000, 0x04000802, 0x04000802, 0x04200002, 0x04200002, 0x00000002,
   0x00200002, 0x04000000, 0x04000800, 0x00200000, 0x04200800, 0x00000802, 0x00200802, 0x04200800,
   0x00000802, 0x04000002, 0x04200802, 0x04200000, 0x00200800, 0x00000000, 0x00000002, 0x04200802,
   0x00000000, 0x00200802, 0x04200000, 0x00000800, 0x04000002, 0x04000800, 0x00000800, 0x00200002
 };
 
 static u32 sbox8[64] =
 {
   0x10001040, 0x00001000, 0x00040000, 0x10041040, 0x10000000, 0x10001040, 0x00000040, 0x10000000,
   0x00040040, 0x10040000, 0x10041040, 0x00041000, 0x10041000, 0x00041040, 0x00001000, 0x00000040,
   0x10040000, 0x10000040, 0x10001000, 0x00001040, 0x00041000, 0x00040040, 0x10040040, 0x10041000,
   0x00001040, 0x00000000, 0x00000000, 0x10040040, 0x10000040, 0x10001000, 0x00041040, 0x00040000,
   0x00041040, 0x00040000, 0x10041000, 0x00001000, 0x00000040, 0x10040040, 0x00001000, 0x00041040,
   0x10001000, 0x00000040, 0x10000040, 0x10040000, 0x10040040, 0x10000000, 0x00040000, 0x10001040,
   0x00000000, 0x10041040, 0x00040040, 0x10000040, 0x10040000, 0x10001000, 0x10001040, 0x00000000,
   0x10041040, 0x00041000, 0x00041000, 0x00001040, 0x00001040, 0x00040040, 0x10000000, 0x10041000
 };
 
 
 /*
  * These two tables are part of the 'permuted choice 1' function.
  * In this implementation several speed improvements are done.
  */
 static u32 leftkey_swap[16] =
 {
   0x00000000, 0x00000001, 0x00000100, 0x00000101,
   0x00010000, 0x00010001, 0x00010100, 0x00010101,
   0x01000000, 0x01000001, 0x01000100, 0x01000101,
   0x01010000, 0x01010001, 0x01010100, 0x01010101
 };
 
 static u32 rightkey_swap[16] =
 {
   0x00000000, 0x01000000, 0x00010000, 0x01010000,
   0x00000100, 0x01000100, 0x00010100, 0x01010100,
   0x00000001, 0x01000001, 0x00010001, 0x01010001,
   0x00000101, 0x01000101, 0x00010101, 0x01010101,
 };
 
 
 
 /*
  * Numbers of left shifts per round for encryption subkeys.
  * To calculate the decryption subkeys we just reverse the
  * ordering of the calculated encryption subkeys. So their
  * is no need for a decryption rotate tab.
  */
 static byte encrypt_rotate_tab[16] =
 {
   1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1
 };
 
 
 
 /*
  * Table with weak DES keys sorted in ascending order.
  * In DES their are 64 known keys which are weak. They are weak
  * because they produce only one, two or four different
  * subkeys in the subkey scheduling process.
  * The keys in this table have all their parity bits cleared.
  */
 static byte weak_keys[64][8] =
 {
   { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /*w*/
   { 0x00, 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e },
   { 0x00, 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0 },
   { 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe },
   { 0x00, 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e }, /*sw*/
   { 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00 },
   { 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe },
   { 0x00, 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0 },
   { 0x00, 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0 }, /*sw*/
   { 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe },
   { 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00 },
   { 0x00, 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e },
   { 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe }, /*sw*/
   { 0x00, 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0 },
   { 0x00, 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e },
   { 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00 },
   { 0x1e, 0x00, 0x00, 0x1e, 0x0e, 0x00, 0x00, 0x0e },
   { 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e, 0x00 }, /*sw*/
   { 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0, 0xfe },
   { 0x1e, 0x00, 0xfe, 0xe0, 0x0e, 0x00, 0xfe, 0xf0 },
   { 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00, 0x00 },
   { 0x1e, 0x1e, 0x1e, 0x1e, 0x0e, 0x0e, 0x0e, 0x0e }, /*w*/
   { 0x1e, 0x1e, 0xe0, 0xe0, 0x0e, 0x0e, 0xf0, 0xf0 },
   { 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe, 0xfe },
   { 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00, 0xfe },
   { 0x1e, 0xe0, 0x1e, 0xe0, 0x0e, 0xf0, 0x0e, 0xf0 }, /*sw*/
   { 0x1e, 0xe0, 0xe0, 0x1e, 0x0e, 0xf0, 0xf0, 0x0e },
   { 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe, 0x00 },
   { 0x1e, 0xfe, 0x00, 0xe0, 0x0e, 0xfe, 0x00, 0xf0 },
   { 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e, 0xfe }, /*sw*/
   { 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0, 0x00 },
   { 0x1e, 0xfe, 0xfe, 0x1e, 0x0e, 0xfe, 0xfe, 0x0e },
   { 0xe0, 0x00, 0x00, 0xe0, 0xf0, 0x00, 0x00, 0xf0 },
   { 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e, 0xfe },
   { 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0, 0x00 }, /*sw*/
   { 0xe0, 0x00, 0xfe, 0x1e, 0xf0, 0x00, 0xfe, 0x0e },
   { 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00, 0xfe },
   { 0xe0, 0x1e, 0x1e, 0xe0, 0xf0, 0x0e, 0x0e, 0xf0 },
   { 0xe0, 0x1e, 0xe0, 0x1e, 0xf0, 0x0e, 0xf0, 0x0e }, /*sw*/
   { 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe, 0x00 },
   { 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00, 0x00 },
   { 0xe0, 0xe0, 0x1e, 0x1e, 0xf0, 0xf0, 0x0e, 0x0e },
   { 0xe0, 0xe0, 0xe0, 0xe0, 0xf0, 0xf0, 0xf0, 0xf0 }, /*w*/
   { 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe, 0xfe },
   { 0xe0, 0xfe, 0x00, 0x1e, 0xf0, 0xfe, 0x00, 0x0e },
   { 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e, 0x00 },
   { 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0, 0xfe }, /*sw*/
   { 0xe0, 0xfe, 0xfe, 0xe0, 0xf0, 0xfe, 0xfe, 0xf0 },
   { 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe },
   { 0xfe, 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0 },
   { 0xfe, 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e },
   { 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00 }, /*sw*/
   { 0xfe, 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0 },
   { 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe },
   { 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00 },
   { 0xfe, 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e }, /*sw*/
   { 0xfe, 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e },
   { 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00 },
   { 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe },
   { 0xfe, 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0 }, /*sw*/
   { 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00 },
   { 0xfe, 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e },
   { 0xfe, 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0 },
   { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }  /*w*/
 };
 static unsigned char weak_keys_chksum[20] = {
   0xD0, 0xCF, 0x07, 0x38, 0x93, 0x70, 0x8A, 0x83, 0x7D, 0xD7,
   0x8A, 0x36, 0x65, 0x29, 0x6C, 0x1F, 0x7C, 0x3F, 0xD3, 0x41
 };
 
 
 
 /*
  * Macro to swap bits across two words.
  */
 #define DO_PERMUTATION(a, temp, b, offset, mask)	\
     temp = ((a>>offset) ^ b) & mask;			\
     b ^= temp;						\
     a ^= temp<<offset;
 
 
 /*
  * This performs the 'initial permutation' of the data to be encrypted
  * or decrypted. Additionally the resulting two words are rotated one bit
  * to the left.
  */
 #define INITIAL_PERMUTATION(left, temp, right)		\
     DO_PERMUTATION(left, temp, right, 4, 0x0f0f0f0f)	\
     DO_PERMUTATION(left, temp, right, 16, 0x0000ffff)	\
     DO_PERMUTATION(right, temp, left, 2, 0x33333333)	\
     DO_PERMUTATION(right, temp, left, 8, 0x00ff00ff)	\
     right =  (right << 1) | (right >> 31);		\
     temp  =  (left ^ right) & 0xaaaaaaaa;		\
     right ^= temp;					\
     left  ^= temp;					\
     left  =  (left << 1) | (left >> 31);
 
 /*
  * The 'inverse initial permutation'.
  */
 #define FINAL_PERMUTATION(left, temp, right)		\
     left  =  (left << 31) | (left >> 1);		\
     temp  =  (left ^ right) & 0xaaaaaaaa;		\
     left  ^= temp;					\
     right ^= temp;					\
     right  =  (right << 31) | (right >> 1);		\
     DO_PERMUTATION(right, temp, left, 8, 0x00ff00ff)	\
     DO_PERMUTATION(right, temp, left, 2, 0x33333333)	\
     DO_PERMUTATION(left, temp, right, 16, 0x0000ffff)	\
     DO_PERMUTATION(left, temp, right, 4, 0x0f0f0f0f)
 
 
 /*
  * A full DES round including 'expansion function', 'sbox substitution'
  * and 'primitive function P' but without swapping the left and right word.
  * Please note: The data in 'from' and 'to' is already rotated one bit to
  * the left, done in the initial permutation.
  */
 #define DES_ROUND(from, to, work, subkey)		\
     work = from ^ *subkey++;				\
     to ^= sbox8[  work	    & 0x3f ];			\
     to ^= sbox6[ (work>>8)  & 0x3f ];			\
     to ^= sbox4[ (work>>16) & 0x3f ];			\
     to ^= sbox2[ (work>>24) & 0x3f ];			\
     work = ((from << 28) | (from >> 4)) ^ *subkey++;	\
     to ^= sbox7[  work	    & 0x3f ];			\
     to ^= sbox5[ (work>>8)  & 0x3f ];			\
     to ^= sbox3[ (work>>16) & 0x3f ];			\
     to ^= sbox1[ (work>>24) & 0x3f ];
 
 /*
  * Macros to convert 8 bytes from/to 32bit words.
  */
 #define READ_64BIT_DATA(data, left, right)				   \
     left = buf_get_be32(data + 0);					   \
     right = buf_get_be32(data + 4);
 
 #define WRITE_64BIT_DATA(data, left, right)				   \
     buf_put_be32(data + 0, left);					   \
     buf_put_be32(data + 4, right);
 
 /*
  * Handy macros for encryption and decryption of data
  */
 #define des_ecb_encrypt(ctx, from, to)	      des_ecb_crypt(ctx, from, to, 0)
 #define des_ecb_decrypt(ctx, from, to)	      des_ecb_crypt(ctx, from, to, 1)
 #define tripledes_ecb_encrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,0)
 #define tripledes_ecb_decrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,1)
 
 
 
 
 
 
 /*
  * des_key_schedule():	  Calculate 16 subkeys pairs (even/odd) for
  *			  16 encryption rounds.
  *			  To calculate subkeys for decryption the caller
  *			  have to reorder the generated subkeys.
  *
  *    rawkey:	    8 Bytes of key data
  *    subkey:	    Array of at least 32 u32s. Will be filled
  *		    with calculated subkeys.
  *
  */
 static void
 des_key_schedule (const byte * rawkey, u32 * subkey)
 {
   u32 left, right, work;
   int round;
 
   READ_64BIT_DATA (rawkey, left, right)
 
   DO_PERMUTATION (right, work, left, 4, 0x0f0f0f0f)
   DO_PERMUTATION (right, work, left, 0, 0x10101010)
 
   left = ((leftkey_swap[(left >> 0) & 0xf] << 3)
           | (leftkey_swap[(left >> 8) & 0xf] << 2)
           | (leftkey_swap[(left >> 16) & 0xf] << 1)
           | (leftkey_swap[(left >> 24) & 0xf])
           | (leftkey_swap[(left >> 5) & 0xf] << 7)
           | (leftkey_swap[(left >> 13) & 0xf] << 6)
           | (leftkey_swap[(left >> 21) & 0xf] << 5)
           | (leftkey_swap[(left >> 29) & 0xf] << 4));
 
   left &= 0x0fffffff;
 
   right = ((rightkey_swap[(right >> 1) & 0xf] << 3)
            | (rightkey_swap[(right >> 9) & 0xf] << 2)
            | (rightkey_swap[(right >> 17) & 0xf] << 1)
            | (rightkey_swap[(right >> 25) & 0xf])
            | (rightkey_swap[(right >> 4) & 0xf] << 7)
            | (rightkey_swap[(right >> 12) & 0xf] << 6)
            | (rightkey_swap[(right >> 20) & 0xf] << 5)
            | (rightkey_swap[(right >> 28) & 0xf] << 4));
 
   right &= 0x0fffffff;
 
   for (round = 0; round < 16; ++round)
     {
       left = ((left << encrypt_rotate_tab[round])
               | (left >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff;
       right = ((right << encrypt_rotate_tab[round])
                | (right >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff;
 
       *subkey++ = (((left << 4) & 0x24000000)
                    | ((left << 28) & 0x10000000)
                    | ((left << 14) & 0x08000000)
                    | ((left << 18) & 0x02080000)
                    | ((left << 6) & 0x01000000)
                    | ((left << 9) & 0x00200000)
                    | ((left >> 1) & 0x00100000)
                    | ((left << 10) & 0x00040000)
                    | ((left << 2) & 0x00020000)
                    | ((left >> 10) & 0x00010000)
                    | ((right >> 13) & 0x00002000)
                    | ((right >> 4) & 0x00001000)
                    | ((right << 6) & 0x00000800)
                    | ((right >> 1) & 0x00000400)
                    | ((right >> 14) & 0x00000200)
                    | (right & 0x00000100)
                    | ((right >> 5) & 0x00000020)
                    | ((right >> 10) & 0x00000010)
                    | ((right >> 3) & 0x00000008)
                    | ((right >> 18) & 0x00000004)
                    | ((right >> 26) & 0x00000002)
                    | ((right >> 24) & 0x00000001));
 
       *subkey++ = (((left << 15) & 0x20000000)
                    | ((left << 17) & 0x10000000)
                    | ((left << 10) & 0x08000000)
                    | ((left << 22) & 0x04000000)
                    | ((left >> 2) & 0x02000000)
                    | ((left << 1) & 0x01000000)
                    | ((left << 16) & 0x00200000)
                    | ((left << 11) & 0x00100000)
                    | ((left << 3) & 0x00080000)
                    | ((left >> 6) & 0x00040000)
                    | ((left << 15) & 0x00020000)
                    | ((left >> 4) & 0x00010000)
                    | ((right >> 2) & 0x00002000)
                    | ((right << 8) & 0x00001000)
                    | ((right >> 14) & 0x00000808)
                    | ((right >> 9) & 0x00000400)
                    | ((right) & 0x00000200)
                    | ((right << 7) & 0x00000100)
                    | ((right >> 7) & 0x00000020)
                    | ((right >> 3) & 0x00000011)
                    | ((right << 2) & 0x00000004)
                    | ((right >> 21) & 0x00000002));
     }
 }
 
 
 /*
  * Fill a DES context with subkeys calculated from a 64bit key.
  * Does not check parity bits, but simply ignore them.
  * Does not check for weak keys.
  */
 static int
 des_setkey (struct _des_ctx *ctx, const byte * key)
 {
   static const char *selftest_failed;
   int i;
 
   if (!fips_mode () && !initialized)
     {
       initialized = 1;
       selftest_failed = selftest ();
 
       if (selftest_failed)
 	log_error ("%s\n", selftest_failed);
     }
   if (selftest_failed)
     return GPG_ERR_SELFTEST_FAILED;
 
   des_key_schedule (key, ctx->encrypt_subkeys);
   _gcry_burn_stack (32);
 
   for(i=0; i<32; i+=2)
     {
       ctx->decrypt_subkeys[i]	= ctx->encrypt_subkeys[30-i];
       ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i];
     }
 
   return 0;
 }
 
 
 
 /*
  * Electronic Codebook Mode DES encryption/decryption of data according
  * to 'mode'.
  */
 static int
 des_ecb_crypt (struct _des_ctx *ctx, const byte * from, byte * to, int mode)
 {
   u32 left, right, work;
   u32 *keys;
 
   keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
 
   READ_64BIT_DATA (from, left, right)
   INITIAL_PERMUTATION (left, work, right)
 
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
 
   FINAL_PERMUTATION (right, work, left)
   WRITE_64BIT_DATA (to, right, left)
 
   return 0;
 }
 
 
 
 /*
  * Fill a Triple-DES context with subkeys calculated from two 64bit keys.
  * Does not check the parity bits of the keys, but simply ignore them.
  * Does not check for weak keys.
  */
 static int
 tripledes_set2keys (struct _tripledes_ctx *ctx,
 		    const byte * key1,
 		    const byte * key2)
 {
   int i;
 
   des_key_schedule (key1, ctx->encrypt_subkeys);
   des_key_schedule (key2, &(ctx->decrypt_subkeys[32]));
   _gcry_burn_stack (32);
 
   for(i=0; i<32; i+=2)
     {
       ctx->decrypt_subkeys[i]	 = ctx->encrypt_subkeys[30-i];
       ctx->decrypt_subkeys[i+1]  = ctx->encrypt_subkeys[31-i];
 
       ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i];
       ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i];
 
       ctx->encrypt_subkeys[i+64] = ctx->encrypt_subkeys[i];
       ctx->encrypt_subkeys[i+65] = ctx->encrypt_subkeys[i+1];
 
       ctx->decrypt_subkeys[i+64] = ctx->decrypt_subkeys[i];
       ctx->decrypt_subkeys[i+65] = ctx->decrypt_subkeys[i+1];
     }
 
   return 0;
 }
 
 
 
 /*
  * Fill a Triple-DES context with subkeys calculated from three 64bit keys.
  * Does not check the parity bits of the keys, but simply ignore them.
  * Does not check for weak keys.
  */
 static int
 tripledes_set3keys (struct _tripledes_ctx *ctx,
 		    const byte * key1,
 		    const byte * key2,
 		    const byte * key3)
 {
   static const char *selftest_failed;
   int i;
 
   if (!fips_mode () && !initialized)
     {
       initialized = 1;
       selftest_failed = selftest ();
 
       if (selftest_failed)
 	log_error ("%s\n", selftest_failed);
     }
   if (selftest_failed)
     return GPG_ERR_SELFTEST_FAILED;
 
   des_key_schedule (key1, ctx->encrypt_subkeys);
   des_key_schedule (key2, &(ctx->decrypt_subkeys[32]));
   des_key_schedule (key3, &(ctx->encrypt_subkeys[64]));
   _gcry_burn_stack (32);
 
   for(i=0; i<32; i+=2)
     {
       ctx->decrypt_subkeys[i]	 = ctx->encrypt_subkeys[94-i];
       ctx->decrypt_subkeys[i+1]  = ctx->encrypt_subkeys[95-i];
 
       ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i];
       ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i];
 
       ctx->decrypt_subkeys[i+64] = ctx->encrypt_subkeys[30-i];
       ctx->decrypt_subkeys[i+65] = ctx->encrypt_subkeys[31-i];
      }
 
   return 0;
 }
 
 
 
 #ifdef USE_AMD64_ASM
 
 /* Assembly implementation of triple-DES. */
 extern void _gcry_3des_amd64_crypt_block(const void *keys, byte *out,
                                          const byte *in);
 
 /* These assembly implementations process three blocks in parallel. */
 extern void _gcry_3des_amd64_ctr_enc(const void *keys, byte *out,
                                      const byte *in, byte *ctr);
 
 extern void _gcry_3des_amd64_cbc_dec(const void *keys, byte *out,
                                      const byte *in, byte *iv);
 
 extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out,
                                      const byte *in, byte *iv);
 
 #define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *))
 
 
 /*
  * Electronic Codebook Mode Triple-DES encryption/decryption of data
  * according to 'mode'.  Sometimes this mode is named 'EDE' mode
  * (Encryption-Decryption-Encryption).
  */
 static inline int
 tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
                      byte * to, int mode)
 {
   u32 *keys;
 
   keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
 
   _gcry_3des_amd64_crypt_block(keys, to, from);
 
   return 0;
 }
 
 static inline void
 tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr)
 {
   _gcry_3des_amd64_ctr_enc(keys, out, in, ctr);
 }
 
 static inline void
 tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv)
 {
   _gcry_3des_amd64_cbc_dec(keys, out, in, iv);
 }
 
 static inline void
 tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv)
 {
   _gcry_3des_amd64_cfb_dec(keys, out, in, iv);
 }
 
 #else /*USE_AMD64_ASM*/
 
 #define TRIPLEDES_ECB_BURN_STACK 32
 
 /*
  * Electronic Codebook Mode Triple-DES encryption/decryption of data
  * according to 'mode'.  Sometimes this mode is named 'EDE' mode
  * (Encryption-Decryption-Encryption).
  */
 static int
 tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
                      byte * to, int mode)
 {
   u32 left, right, work;
   u32 *keys;
 
   keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
 
   READ_64BIT_DATA (from, left, right)
   INITIAL_PERMUTATION (left, work, right)
 
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
 
   DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
   DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
   DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
   DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
   DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
   DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
   DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
   DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys)
 
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
   DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys)
 
   FINAL_PERMUTATION (right, work, left)
   WRITE_64BIT_DATA (to, right, left)
 
   return 0;
 }
 
 #endif /*!USE_AMD64_ASM*/
 
 
 
 /* Bulk encryption of complete blocks in CTR mode.  This function is only
    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
    of size DES_BLOCKSIZE. */
 static void
 _gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
                    const void *inbuf_arg, size_t nblocks)
 {
   struct _tripledes_ctx *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char tmpbuf[DES_BLOCKSIZE];
   int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
 
 #ifdef USE_AMD64_ASM
   {
     int asm_burn_depth = 9 * sizeof(void *);
 
     if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
       burn_stack_depth = asm_burn_depth;
 
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
         tripledes_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr);
 
         nblocks -= 3;
         outbuf += 3 * DES_BLOCKSIZE;
         inbuf  += 3 * DES_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* Encrypt the counter. */
       tripledes_ecb_encrypt (ctx, ctr, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
       cipher_block_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE);
       outbuf += DES_BLOCKSIZE;
       inbuf  += DES_BLOCKSIZE;
       /* Increment the counter.  */
       cipher_block_add(ctr, 1, DES_BLOCKSIZE);
     }
 
   wipememory(tmpbuf, sizeof(tmpbuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk decryption of complete blocks in CBC mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
                    const void *inbuf_arg, size_t nblocks)
 {
   struct _tripledes_ctx *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char savebuf[DES_BLOCKSIZE];
   int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
 
 #ifdef USE_AMD64_ASM
   {
     int asm_burn_depth = 10 * sizeof(void *);
 
     if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
       burn_stack_depth = asm_burn_depth;
 
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
         tripledes_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv);
 
         nblocks -= 3;
         outbuf += 3 * DES_BLOCKSIZE;
         inbuf  += 3 * DES_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* INBUF is needed later and it may be identical to OUTBUF, so store
          the intermediate result to SAVEBUF.  */
       tripledes_ecb_decrypt (ctx, inbuf, savebuf);
 
       cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE);
       inbuf += DES_BLOCKSIZE;
       outbuf += DES_BLOCKSIZE;
     }
 
   wipememory(savebuf, sizeof(savebuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk decryption of complete blocks in CFB mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
 		   const void *inbuf_arg, size_t nblocks)
 {
   struct _tripledes_ctx *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
 
 #ifdef USE_AMD64_ASM
   {
     int asm_burn_depth = 9 * sizeof(void *);
 
     if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
       burn_stack_depth = asm_burn_depth;
 
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
         tripledes_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv);
 
         nblocks -= 3;
         outbuf += 3 * DES_BLOCKSIZE;
         inbuf  += 3 * DES_BLOCKSIZE;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       tripledes_ecb_encrypt (ctx, iv, iv);
       cipher_block_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE);
       outbuf += DES_BLOCKSIZE;
       inbuf  += DES_BLOCKSIZE;
     }
 
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /*
  * Check whether the 8 byte key is weak.
  * Does not check the parity bits of the key but simple ignore them.
  */
 static int
 is_weak_key ( const byte *key )
 {
   byte work[8];
   int i, left, right, middle, cmp_result;
 
   /* clear parity bits */
   for(i=0; i<8; ++i)
      work[i] = key[i] & 0xfe;
 
   /* binary search in the weak key table */
   left = 0;
   right = 63;
   while(left <= right)
     {
       middle = (left + right) / 2;
 
       if ( !(cmp_result=working_memcmp(work, weak_keys[middle], 8)) )
 	  return -1;
 
       if ( cmp_result > 0 )
 	  left = middle + 1;
       else
 	  right = middle - 1;
     }
 
   return 0;
 }
 
 
 /*
  * Performs a selftest of this DES/Triple-DES implementation.
  * Returns an string with the error text on failure.
  * Returns NULL if all is ok.
  */
 static const char *
 selftest (void)
 {
   /*
    * Check if 'u32' is really 32 bits wide. This DES / 3DES implementation
    * need this.
    */
   if (sizeof (u32) != 4)
     return "Wrong word size for DES configured.";
 
   /*
    * DES Maintenance Test
    */
   {
     int i;
     byte key[8] =
       {0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55};
     byte input[8] =
       {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
     byte result[8] =
       {0x24, 0x6e, 0x9d, 0xb9, 0xc5, 0x50, 0x38, 0x1a};
     byte temp1[8], temp2[8], temp3[8];
     des_ctx des;
 
     for (i = 0; i < 64; ++i)
       {
 	des_setkey (des, key);
 	des_ecb_encrypt (des, input, temp1);
 	des_ecb_encrypt (des, temp1, temp2);
 	des_setkey (des, temp2);
 	des_ecb_decrypt (des, temp1, temp3);
 	memcpy (key, temp3, 8);
 	memcpy (input, temp1, 8);
       }
     if (memcmp (temp3, result, 8))
       return "DES maintenance test failed.";
   }
 
 
   /*
    * Self made Triple-DES test	(Does somebody know an official test?)
    */
   {
     int i;
     byte input[8] =
       {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10};
     byte key1[8] =
       {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0};
     byte key2[8] =
       {0x11, 0x22, 0x33, 0x44, 0xff, 0xaa, 0xcc, 0xdd};
     byte result[8] =
       {0x7b, 0x38, 0x3b, 0x23, 0xa2, 0x7d, 0x26, 0xd3};
 
     tripledes_ctx des3;
 
     for (i = 0; i < 16; ++i)
       {
 	tripledes_set2keys (des3, key1, key2);
 	tripledes_ecb_encrypt (des3, input, key1);
 	tripledes_ecb_decrypt (des3, input, key2);
 	tripledes_set3keys (des3, key1, input, key2);
 	tripledes_ecb_encrypt (des3, input, input);
       }
     if (memcmp (input, result, 8))
       return "Triple-DES test failed.";
   }
 
   /*
    * More Triple-DES test.  These are testvectors as used by SSLeay,
    * thanks to Jeroen C. van Gelderen.
    */
   {
     static const struct { byte key[24]; byte plain[8]; byte cipher[8]; }
       testdata[] = {
       { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
           0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
           0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01  },
         { 0x95,0xF8,0xA5,0xE5,0xDD,0x31,0xD9,0x00  },
         { 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00  }
       },
 
       { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
           0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
           0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01  },
         { 0x9D,0x64,0x55,0x5A,0x9A,0x10,0xB8,0x52, },
         { 0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00  }
       },
       { { 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E,
           0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E,
           0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E  },
         { 0x51,0x45,0x4B,0x58,0x2D,0xDF,0x44,0x0A  },
         { 0x71,0x78,0x87,0x6E,0x01,0xF1,0x9B,0x2A  }
       },
       { { 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6,
           0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6,
           0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6  },
         { 0x42,0xFD,0x44,0x30,0x59,0x57,0x7F,0xA2  },
         { 0xAF,0x37,0xFB,0x42,0x1F,0x8C,0x40,0x95  }
       },
       { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,
           0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,
           0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF  },
         { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61  },
         { 0x3D,0x12,0x4F,0xE2,0x19,0x8B,0xA3,0x18  }
       },
       { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,
           0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,
           0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF  },
         { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61  },
         { 0xFB,0xAB,0xA1,0xFF,0x9D,0x05,0xE9,0xB1  }
       },
       { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,
           0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,
           0xFE,0xDC,0xBA,0x98,0x76,0x54,0x32,0x10  },
         { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61  },
         { 0x18,0xd7,0x48,0xe5,0x63,0x62,0x05,0x72  }
       },
       { { 0x03,0x52,0x02,0x07,0x67,0x20,0x82,0x17,
           0x86,0x02,0x87,0x66,0x59,0x08,0x21,0x98,
           0x64,0x05,0x6A,0xBD,0xFE,0xA9,0x34,0x57  },
         { 0x73,0x71,0x75,0x69,0x67,0x67,0x6C,0x65  },
         { 0xc0,0x7d,0x2a,0x0f,0xa5,0x66,0xfa,0x30  }
       },
       { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
           0x80,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
           0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02  },
         { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00  },
         { 0xe6,0xe6,0xdd,0x5b,0x7e,0x72,0x29,0x74  }
       },
       { { 0x10,0x46,0x10,0x34,0x89,0x98,0x80,0x20,
           0x91,0x07,0xD0,0x15,0x89,0x19,0x01,0x01,
           0x19,0x07,0x92,0x10,0x98,0x1A,0x01,0x01  },
         { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00  },
         { 0xe1,0xef,0x62,0xc3,0x32,0xfe,0x82,0x5b  }
       }
     };
 
     byte		result[8];
     int		i;
     tripledes_ctx	des3;
 
     for (i=0; i<sizeof(testdata)/sizeof(*testdata); ++i)
       {
         tripledes_set3keys (des3, testdata[i].key,
                             testdata[i].key + 8, testdata[i].key + 16);
 
         tripledes_ecb_encrypt (des3, testdata[i].plain, result);
         if (memcmp (testdata[i].cipher, result, 8))
           return "Triple-DES SSLeay test failed on encryption.";
 
         tripledes_ecb_decrypt (des3, testdata[i].cipher, result);
         if (memcmp (testdata[i].plain, result, 8))
           return  "Triple-DES SSLeay test failed on decryption.";;
       }
   }
 
   /*
    * Check the weak key detection. We simply assume that the table
    * with weak keys is ok and check every key in the table if it is
    * detected... (This test is a little bit stupid).
    */
   {
     int i;
     unsigned char *p;
     gcry_md_hd_t h;
 
     if (_gcry_md_open (&h, GCRY_MD_SHA1, 0))
       return "SHA1 not available";
 
     for (i = 0; i < 64; ++i)
       _gcry_md_write (h, weak_keys[i], 8);
     p = _gcry_md_read (h, GCRY_MD_SHA1);
     i = memcmp (p, weak_keys_chksum, 20);
     _gcry_md_close (h);
     if (i)
       return "weak key table defect";
 
     for (i = 0; i < 64; ++i)
       if (!is_weak_key(weak_keys[i]))
         return "DES weak key detection failed";
   }
 
   return 0;
 }
 
 
 static gcry_err_code_t
 do_tripledes_setkey ( void *context, const byte *key, unsigned keylen,
                       cipher_bulk_ops_t *bulk_ops )
 {
   struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
 
   if( keylen != 24 )
     return GPG_ERR_INV_KEYLEN;
 
   /* Setup bulk encryption routines.  */
   memset (bulk_ops, 0, sizeof(*bulk_ops));
   bulk_ops->cbc_dec =  _gcry_3des_cbc_dec;
   bulk_ops->cfb_dec =  _gcry_3des_cfb_dec;
   bulk_ops->ctr_enc =  _gcry_3des_ctr_enc;
 
   tripledes_set3keys ( ctx, key, key+8, key+16);
 
   if (ctx->flags.no_weak_key)
     ; /* Detection has been disabled.  */
   else if (is_weak_key (key) || is_weak_key (key+8) || is_weak_key (key+16))
     {
       _gcry_burn_stack (64);
       return GPG_ERR_WEAK_KEY;
     }
   _gcry_burn_stack (64);
 
   return GPG_ERR_NO_ERROR;
 }
 
 
 static gcry_err_code_t
 do_tripledes_set_extra_info (void *context, int what,
                              const void *buffer, size_t buflen)
 {
   struct _tripledes_ctx *ctx = (struct _tripledes_ctx *)context;
   gpg_err_code_t ec = 0;
 
   (void)buffer;
   (void)buflen;
 
   switch (what)
     {
     case CIPHER_INFO_NO_WEAK_KEY:
       ctx->flags.no_weak_key = 1;
       break;
 
     default:
       ec = GPG_ERR_INV_OP;
       break;
     }
   return ec;
 }
 
 
 static unsigned int
 do_tripledes_encrypt( void *context, byte *outbuf, const byte *inbuf )
 {
   struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
 
   tripledes_ecb_encrypt ( ctx, inbuf, outbuf );
   return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK;
 }
 
 static unsigned int
 do_tripledes_decrypt( void *context, byte *outbuf, const byte *inbuf )
 {
   struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
   tripledes_ecb_decrypt ( ctx, inbuf, outbuf );
   return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK;
 }
 
 static gcry_err_code_t
 do_des_setkey (void *context, const byte *key, unsigned keylen,
                cipher_bulk_ops_t *bulk_ops)
 {
   struct _des_ctx *ctx = (struct _des_ctx *) context;
 
   (void)bulk_ops;
 
   if (keylen != 8)
     return GPG_ERR_INV_KEYLEN;
 
   des_setkey (ctx, key);
 
   if (is_weak_key (key)) {
     _gcry_burn_stack (64);
     return GPG_ERR_WEAK_KEY;
   }
   _gcry_burn_stack (64);
 
   return GPG_ERR_NO_ERROR;
 }
 
 
 static unsigned int
 do_des_encrypt( void *context, byte *outbuf, const byte *inbuf )
 {
   struct _des_ctx *ctx = (struct _des_ctx *) context;
 
   des_ecb_encrypt ( ctx, inbuf, outbuf );
   return /*burn_stack*/ (32);
 }
 
 static unsigned int
 do_des_decrypt( void *context, byte *outbuf, const byte *inbuf )
 {
   struct _des_ctx *ctx = (struct _des_ctx *) context;
 
   des_ecb_decrypt ( ctx, inbuf, outbuf );
   return /*burn_stack*/ (32);
 }
 
 
 
 
 /*
      Self-test section.
  */
 
 
 /* Selftest for TripleDES.  */
 static gpg_err_code_t
 selftest_fips (int extended, selftest_report_func_t report)
 {
   const char *what;
   const char *errtxt;
 
   (void)extended; /* No extended tests available.  */
 
   what = "low-level";
   errtxt = selftest ();
   if (errtxt)
     goto failed;
 
   /* The low-level self-tests are quite extensive and thus we can do
      without high level tests.  This is also justified because we have
      no custom block code implementation for 3des but always use the
      standard high level block code.  */
 
   return 0; /* Succeeded. */
 
  failed:
   if (report)
     report ("cipher", GCRY_CIPHER_3DES, what, errtxt);
   return GPG_ERR_SELFTEST_FAILED;
 }
 
 
 
 /* Run a full self-test for ALGO and return 0 on success.  */
 static gpg_err_code_t
 run_selftests (int algo, int extended, selftest_report_func_t report)
 {
   gpg_err_code_t ec;
 
   switch (algo)
     {
     case GCRY_CIPHER_3DES:
       ec = selftest_fips (extended, report);
       break;
     default:
       ec = GPG_ERR_CIPHER_ALGO;
       break;
 
     }
   return ec;
 }
 
 
 
 gcry_cipher_spec_t _gcry_cipher_spec_des =
   {
     GCRY_CIPHER_DES, {0, 0},
     "DES", NULL, NULL, 8, 64, sizeof (struct _des_ctx),
     do_des_setkey, do_des_encrypt, do_des_decrypt
   };
 
 static const gcry_cipher_oid_spec_t oids_tripledes[] =
   {
     { "1.2.840.113549.3.7", GCRY_CIPHER_MODE_CBC },
     /* Teletrust specific OID for 3DES. */
     { "1.3.36.3.1.3.2.1",   GCRY_CIPHER_MODE_CBC },
     /* pbeWithSHAAnd3_KeyTripleDES_CBC */
     { "1.2.840.113549.1.12.1.3", GCRY_CIPHER_MODE_CBC },
     { NULL }
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_tripledes =
   {
     GCRY_CIPHER_3DES, {0, 0},
     "3DES", NULL, oids_tripledes, 8, 192, sizeof (struct _tripledes_ctx),
     do_tripledes_setkey, do_tripledes_encrypt, do_tripledes_decrypt,
     NULL, NULL,
     run_selftests,
     do_tripledes_set_extra_info
   };
diff --git a/cipher/md2.c b/cipher/md2.c
index 9af9d697..220fc466 100644
--- a/cipher/md2.c
+++ b/cipher/md2.c
@@ -1,181 +1,181 @@
 /* md2.c - MD2 Message-Digest Algorithm
  * Copyright (C) 2014 Dmitry Eremin-Solenikov
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 
 #include "g10lib.h"
 #include "cipher.h"
 
 #include "bithelp.h"
 #include "bufhelp.h"
 #include "hash-common.h"
 
 typedef struct {
   gcry_md_block_ctx_t bctx;
   unsigned char C[16];
   unsigned char L;
   unsigned char X[48];
 } MD2_CONTEXT;
 
 static const unsigned char S[] =
   {
     0x29, 0x2E, 0x43, 0xC9, 0xA2, 0xD8, 0x7C, 0x01,
     0x3D, 0x36, 0x54, 0xA1, 0xEC, 0xF0, 0x06, 0x13,
     0x62, 0xA7, 0x05, 0xF3, 0xC0, 0xC7, 0x73, 0x8C,
     0x98, 0x93, 0x2B, 0xD9, 0xBC, 0x4C, 0x82, 0xCA,
     0x1E, 0x9B, 0x57, 0x3C, 0xFD, 0xD4, 0xE0, 0x16,
     0x67, 0x42, 0x6F, 0x18, 0x8A, 0x17, 0xE5, 0x12,
     0xBE, 0x4E, 0xC4, 0xD6, 0xDA, 0x9E, 0xDE, 0x49,
     0xA0, 0xFB, 0xF5, 0x8E, 0xBB, 0x2F, 0xEE, 0x7A,
     0xA9, 0x68, 0x79, 0x91, 0x15, 0xB2, 0x07, 0x3F,
     0x94, 0xC2, 0x10, 0x89, 0x0B, 0x22, 0x5F, 0x21,
     0x80, 0x7F, 0x5D, 0x9A, 0x5A, 0x90, 0x32, 0x27,
     0x35, 0x3E, 0xCC, 0xE7, 0xBF, 0xF7, 0x97, 0x03,
     0xFF, 0x19, 0x30, 0xB3, 0x48, 0xA5, 0xB5, 0xD1,
     0xD7, 0x5E, 0x92, 0x2A, 0xAC, 0x56, 0xAA, 0xC6,
     0x4F, 0xB8, 0x38, 0xD2, 0x96, 0xA4, 0x7D, 0xB6,
     0x76, 0xFC, 0x6B, 0xE2, 0x9C, 0x74, 0x04, 0xF1,
     0x45, 0x9D, 0x70, 0x59, 0x64, 0x71, 0x87, 0x20,
     0x86, 0x5B, 0xCF, 0x65, 0xE6, 0x2D, 0xA8, 0x02,
     0x1B, 0x60, 0x25, 0xAD, 0xAE, 0xB0, 0xB9, 0xF6,
     0x1C, 0x46, 0x61, 0x69, 0x34, 0x40, 0x7E, 0x0F,
     0x55, 0x47, 0xA3, 0x23, 0xDD, 0x51, 0xAF, 0x3A,
     0xC3, 0x5C, 0xF9, 0xCE, 0xBA, 0xC5, 0xEA, 0x26,
     0x2C, 0x53, 0x0D, 0x6E, 0x85, 0x28, 0x84, 0x09,
     0xD3, 0xDF, 0xCD, 0xF4, 0x41, 0x81, 0x4D, 0x52,
     0x6A, 0xDC, 0x37, 0xC8, 0x6C, 0xC1, 0xAB, 0xFA,
     0x24, 0xE1, 0x7B, 0x08, 0x0C, 0xBD, 0xB1, 0x4A,
     0x78, 0x88, 0x95, 0x8B, 0xE3, 0x63, 0xE8, 0x6D,
     0xE9, 0xCB, 0xD5, 0xFE, 0x3B, 0x00, 0x1D, 0x39,
     0xF2, 0xEF, 0xB7, 0x0E, 0x66, 0x58, 0xD0, 0xE4,
     0xA6, 0x77, 0x72, 0xF8, 0xEB, 0x75, 0x4B, 0x0A,
     0x31, 0x44, 0x50, 0xB4, 0x8F, 0xED, 0x1F, 0x1A,
     0xDB, 0x99, 0x8D, 0x33, 0x9F, 0x11, 0x83, 0x14
 };
 
 
 static void
 permute (unsigned char *X, const unsigned char *buf)
 {
   int i, j;
   unsigned char t;
 
   memcpy (X+16, buf, 16);
   for (i = 0; i < 16; i++)
     X[32+i] = X[16+i] ^ X[i];
   t = 0;
   for (i = 0; i < 18; i++)
     {
       for (j = 0; j < 48; j++)
         {
           t = X[j] ^ S[t];
           X[j] = t;
         }
       t += i;
     }
 }
 
 
 static unsigned int
 transform_blk (void *c, const unsigned char *data)
 {
   MD2_CONTEXT *ctx = c;
   int j;
 
   for (j = 0; j < 16; j++)
     {
       ctx->C[j] ^= S[data[j] ^ ctx->L];
       ctx->L = ctx->C[j];
     }
 
   permute(ctx->X, data);
 
   return /* burn stack */ 4 + 5 * sizeof(void*);
 }
 
 
 static unsigned int
 transform ( void *c, const unsigned char *data, size_t nblks )
 {
   unsigned int burn;
 
   do
     {
       burn = transform_blk (c, data);
       data += 64;
     }
   while (--nblks);
 
   return burn;
 }
 
 
 static void
 md2_init (void *context, unsigned int flags)
 {
   MD2_CONTEXT *ctx = context;
 
   (void)flags;
 
   memset (ctx, 0, sizeof(*ctx));
   ctx->bctx.blocksize_shift = _gcry_ctz(16);
   ctx->bctx.bwrite = transform;
 }
 
 
 static void
 md2_final (void *context)
 {
   MD2_CONTEXT *hd = context;
   unsigned int burn;
 
   /* pad */
   memset (hd->bctx.buf + hd->bctx.count,
           16 - hd->bctx.count, 16 - hd->bctx.count);
   burn = transform_blk (hd, hd->bctx.buf);
   permute (hd->X, hd->C);
 }
 
 static byte *
 md2_read (void *context)
 {
   MD2_CONTEXT *hd = (MD2_CONTEXT *) context;
   return hd->X;
 }
 
 static const byte asn[18] = /* Object ID is 1.2.840.113549.2.2 */
   { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48,
     0x86, 0xf7, 0x0d, 0x02, 0x02, 0x05, 0x00, 0x04, 0x10 };
 
 static const gcry_md_oid_spec_t oid_spec_md2[] =
   {
     /* iso.member-body.us.rsadsi.digestAlgorithm.md2 */
     { "1.2.840.113549.2.2" },
     { NULL },
   };
 
 const gcry_md_spec_t _gcry_digest_spec_md2 =
   {
     GCRY_MD_MD2, {0, 0},
     "MD2", asn, DIM (asn), oid_spec_md2, 16,
     md2_init, _gcry_md_block_write, md2_final, md2_read, NULL,
     NULL,
     sizeof (MD2_CONTEXT)
   };
diff --git a/cipher/md4.c b/cipher/md4.c
index 49b2af2a..530af54f 100644
--- a/cipher/md4.c
+++ b/cipher/md4.c
@@ -1,296 +1,296 @@
 /* md4.c - MD4 Message-Digest Algorithm
  * Copyright (C) 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Based on md5.c in libgcrypt, but rewritten to compute md4 checksums
  * using a public domain md4 implementation with the following comments:
  *
  * Modified by Wei Dai from Andrew M. Kuchling's md4.c
  * The original code and all modifications are in the public domain.
  *
  * This is the original introductory comment:
  *
  *  md4.c : MD4 hash algorithm.
  *
  * Part of the Python Cryptography Toolkit, version 1.1
  *
  * Distribute and use freely; there are no restrictions on further
  * dissemination and usage except those imposed by the laws of your
  * country of residence.
  *
  */
 
 /* MD4 test suite:
  * MD4 ("") = 31d6cfe0d16ae931b73c59d7e0c089c0
  * MD4 ("a") = bde52cb31de33e46245e05fbdbd6fb24
  * MD4 ("abc") = a448017aaf21d8525fc10ae87aa6729d
  * MD4 ("message digest") = d9130a8164549fe818874806e1c7014b
  * MD4 ("abcdefghijklmnopqrstuvwxyz") = d79e1c308aa5bbcdeea8ed63df412da9
  * MD4 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") =
  * 043f8582f241db351ce627e153e7f0e4
  * MD4 ("123456789012345678901234567890123456789012345678901234567890123456
  * 78901234567890") = e33b4ddc9c38f2199c3e7b164fcc0536
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 #include "cipher.h"
 
 #include "bithelp.h"
 #include "bufhelp.h"
 #include "hash-common.h"
 
 
 typedef struct {
     gcry_md_block_ctx_t bctx;
     u32 A,B,C,D;	  /* chaining variables */
 } MD4_CONTEXT;
 
 static unsigned int
 transform ( void *c, const unsigned char *data, size_t nblks );
 
 static void
 md4_init (void *context, unsigned int flags)
 {
   MD4_CONTEXT *ctx = context;
 
   (void)flags;
 
   ctx->A = 0x67452301;
   ctx->B = 0xefcdab89;
   ctx->C = 0x98badcfe;
   ctx->D = 0x10325476;
 
   ctx->bctx.nblocks = 0;
   ctx->bctx.nblocks_high = 0;
   ctx->bctx.count = 0;
   ctx->bctx.blocksize_shift = _gcry_ctz(64);
   ctx->bctx.bwrite = transform;
 }
 
 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
 #define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
 #define H(x, y, z) ((x) ^ (y) ^ (z))
 
 
 /****************
  * transform 64 bytes
  */
 static unsigned int
 transform_blk ( void *c, const unsigned char *data )
 {
   MD4_CONTEXT *ctx = c;
   u32 in[16];
   register u32 A = ctx->A;
   register u32 B = ctx->B;
   register u32 C = ctx->C;
   register u32 D = ctx->D;
   int i;
 
   for ( i = 0; i < 16; i++ )
     in[i] = buf_get_le32(data + i * 4);
 
   /* Round 1.  */
 #define function(a,b,c,d,k,s) a=rol(a+F(b,c,d)+in[k],s);
   function(A,B,C,D, 0, 3);
   function(D,A,B,C, 1, 7);
   function(C,D,A,B, 2,11);
   function(B,C,D,A, 3,19);
   function(A,B,C,D, 4, 3);
   function(D,A,B,C, 5, 7);
   function(C,D,A,B, 6,11);
   function(B,C,D,A, 7,19);
   function(A,B,C,D, 8, 3);
   function(D,A,B,C, 9, 7);
   function(C,D,A,B,10,11);
   function(B,C,D,A,11,19);
   function(A,B,C,D,12, 3);
   function(D,A,B,C,13, 7);
   function(C,D,A,B,14,11);
   function(B,C,D,A,15,19);
 
 #undef function
 
   /* Round 2.  */
 #define function(a,b,c,d,k,s) a=rol(a+G(b,c,d)+in[k]+0x5a827999,s);
 
   function(A,B,C,D, 0, 3);
   function(D,A,B,C, 4, 5);
   function(C,D,A,B, 8, 9);
   function(B,C,D,A,12,13);
   function(A,B,C,D, 1, 3);
   function(D,A,B,C, 5, 5);
   function(C,D,A,B, 9, 9);
   function(B,C,D,A,13,13);
   function(A,B,C,D, 2, 3);
   function(D,A,B,C, 6, 5);
   function(C,D,A,B,10, 9);
   function(B,C,D,A,14,13);
   function(A,B,C,D, 3, 3);
   function(D,A,B,C, 7, 5);
   function(C,D,A,B,11, 9);
   function(B,C,D,A,15,13);
 
 #undef function
 
   /* Round 3.  */
 #define function(a,b,c,d,k,s) a=rol(a+H(b,c,d)+in[k]+0x6ed9eba1,s);
 
   function(A,B,C,D, 0, 3);
   function(D,A,B,C, 8, 9);
   function(C,D,A,B, 4,11);
   function(B,C,D,A,12,15);
   function(A,B,C,D, 2, 3);
   function(D,A,B,C,10, 9);
   function(C,D,A,B, 6,11);
   function(B,C,D,A,14,15);
   function(A,B,C,D, 1, 3);
   function(D,A,B,C, 9, 9);
   function(C,D,A,B, 5,11);
   function(B,C,D,A,13,15);
   function(A,B,C,D, 3, 3);
   function(D,A,B,C,11, 9);
   function(C,D,A,B, 7,11);
   function(B,C,D,A,15,15);
 
 
   /* Put checksum in context given as argument.  */
   ctx->A += A;
   ctx->B += B;
   ctx->C += C;
   ctx->D += D;
 
   return /*burn_stack*/ 80+6*sizeof(void*);
 }
 
 
 static unsigned int
 transform ( void *c, const unsigned char *data, size_t nblks )
 {
   unsigned int burn;
 
   do
     {
       burn = transform_blk (c, data);
       data += 64;
     }
   while (--nblks);
 
   return burn;
 }
 
 
 /* The routine final terminates the message-digest computation and
  * ends with the desired message digest in mdContext->digest[0...15].
  * The handle is prepared for a new MD4 cycle.
  * Returns 16 bytes representing the digest.
  */
 
 static void
 md4_final( void *context )
 {
   MD4_CONTEXT *hd = context;
   u32 t, th, msb, lsb;
   byte *p;
   unsigned int burn;
 
   t = hd->bctx.nblocks;
   if (sizeof t == sizeof hd->bctx.nblocks)
     th = hd->bctx.nblocks_high;
   else
     th = hd->bctx.nblocks >> 32;
 
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
   msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
   if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
   lsb <<= 3;
   msb <<= 3;
   msb |= t >> 29;
 
   if (hd->bctx.count < 56)  /* enough room */
     {
       hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
       if (hd->bctx.count < 56)
 	memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
 
       /* append the 64 bit count */
       buf_put_le32(hd->bctx.buf + 56, lsb);
       buf_put_le32(hd->bctx.buf + 60, msb);
       burn = transform (hd, hd->bctx.buf, 1);
     }
   else /* need one extra block */
     {
       hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
       /* fill pad and next block with zeroes */
       memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
 
       /* append the 64 bit count */
       buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
       buf_put_le32(hd->bctx.buf + 64 + 60, msb);
       burn = transform (hd, hd->bctx.buf, 2);
     }
 
   p = hd->bctx.buf;
 #define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0)
   X(A);
   X(B);
   X(C);
   X(D);
 #undef X
 
   hd->bctx.count = 0;
 
   _gcry_burn_stack (burn);
 }
 
 static byte *
 md4_read (void *context)
 {
   MD4_CONTEXT *hd = context;
   return hd->bctx.buf;
 }
 
 static const byte asn[18] = /* Object ID is 1.2.840.113549.2.4 */
   { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48,
     0x86, 0xf7, 0x0d, 0x02, 0x04, 0x05, 0x00, 0x04, 0x10 };
 
 static const gcry_md_oid_spec_t oid_spec_md4[] =
   {
     /* iso.member-body.us.rsadsi.digestAlgorithm.md4 */
     { "1.2.840.113549.2.4" },
     { NULL },
   };
 
 const gcry_md_spec_t _gcry_digest_spec_md4 =
   {
     GCRY_MD_MD4, {0, 0},
     "MD4", asn, DIM (asn), oid_spec_md4,16,
     md4_init, _gcry_md_block_write, md4_final, md4_read, NULL,
     NULL,
     sizeof (MD4_CONTEXT)
   };
diff --git a/cipher/md5.c b/cipher/md5.c
index 744a2cc1..b807da55 100644
--- a/cipher/md5.c
+++ b/cipher/md5.c
@@ -1,322 +1,322 @@
 /* md5.c - MD5 Message-Digest Algorithm
  * Copyright (C) 1995,1996,1998,1999,2001,2002,
  *               2003  Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * According to the definition of MD5 in RFC 1321 from April 1992.
  * NOTE: This is *not* the same file as the one from glibc.
  * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
  * heavily modified for GnuPG by Werner Koch <wk@gnupg.org>
  */
 
 /* Test values:
  * ""                  D4 1D 8C D9 8F 00 B2 04  E9 80 09 98 EC F8 42 7E
  * "a"                 0C C1 75 B9 C0 F1 B6 A8  31 C3 99 E2 69 77 26 61
  * "abc                90 01 50 98 3C D2 4F B0  D6 96 3F 7D 28 E1 7F 72
  * "message digest"    F9 6B 69 7D 7C B7 93 8D  52 5A 2F 31 AA F1 61 D0
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 #include "cipher.h"
 
 #include "bithelp.h"
 #include "bufhelp.h"
 #include "hash-common.h"
 
 
 typedef struct {
     gcry_md_block_ctx_t bctx;
     u32 A,B,C,D;	  /* chaining variables */
 } MD5_CONTEXT;
 
 static unsigned int
 transform ( void *ctx, const unsigned char *data, size_t datalen );
 
 static void
 md5_init( void *context, unsigned int flags)
 {
   MD5_CONTEXT *ctx = context;
 
   (void)flags;
 
   ctx->A = 0x67452301;
   ctx->B = 0xefcdab89;
   ctx->C = 0x98badcfe;
   ctx->D = 0x10325476;
 
   ctx->bctx.nblocks = 0;
   ctx->bctx.nblocks_high = 0;
   ctx->bctx.count = 0;
   ctx->bctx.blocksize_shift = _gcry_ctz(64);
   ctx->bctx.bwrite = transform;
 }
 
 
 /* These are the four functions used in the four steps of the MD5 algorithm
    and defined in the RFC 1321.  The first function is a little bit optimized
    (as found in Colin Plumbs public domain implementation).  */
 /* #define FF(b, c, d) ((b & c) | (~b & d)) */
 #define FF(b, c, d) (d ^ (b & (c ^ d)))
 #define FG(b, c, d) FF (d, b, c)
 #define FH(b, c, d) (b ^ c ^ d)
 #define FI(b, c, d) (c ^ (b | ~d))
 
 
 /****************
  * transform 64 bytes
  */
 static unsigned int
 transform_blk ( void *c, const unsigned char *data )
 {
   MD5_CONTEXT *ctx = c;
   u32 correct_words[16];
   register u32 A = ctx->A;
   register u32 B = ctx->B;
   register u32 C = ctx->C;
   register u32 D = ctx->D;
   u32 *cwp = correct_words;
   int i;
 
   for ( i = 0; i < 16; i++ )
     correct_words[i] = buf_get_le32(data + i * 4);
 
 #define OP(a, b, c, d, s, T) \
   do			         	   \
     {					   \
       a += FF (b, c, d) + (*cwp++) + T;    \
       a = rol(a, s);			   \
       a += b;				   \
     }					   \
   while (0)
 
   /* Before we start, one word about the strange constants.
      They are defined in RFC 1321 as
 
      T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
   */
 
   /* Round 1.  */
   OP (A, B, C, D,  7, 0xd76aa478);
   OP (D, A, B, C, 12, 0xe8c7b756);
   OP (C, D, A, B, 17, 0x242070db);
   OP (B, C, D, A, 22, 0xc1bdceee);
   OP (A, B, C, D,  7, 0xf57c0faf);
   OP (D, A, B, C, 12, 0x4787c62a);
   OP (C, D, A, B, 17, 0xa8304613);
   OP (B, C, D, A, 22, 0xfd469501);
   OP (A, B, C, D,  7, 0x698098d8);
   OP (D, A, B, C, 12, 0x8b44f7af);
   OP (C, D, A, B, 17, 0xffff5bb1);
   OP (B, C, D, A, 22, 0x895cd7be);
   OP (A, B, C, D,  7, 0x6b901122);
   OP (D, A, B, C, 12, 0xfd987193);
   OP (C, D, A, B, 17, 0xa679438e);
   OP (B, C, D, A, 22, 0x49b40821);
 
 #undef OP
 #define OP(f, a, b, c, d, k, s, T)  \
     do								      \
       { 							      \
 	a += f (b, c, d) + correct_words[k] + T;		      \
 	a = rol(a, s);						      \
 	a += b; 						      \
       } 							      \
     while (0)
 
   /* Round 2.  */
   OP (FG, A, B, C, D,  1,  5, 0xf61e2562);
   OP (FG, D, A, B, C,  6,  9, 0xc040b340);
   OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
   OP (FG, B, C, D, A,  0, 20, 0xe9b6c7aa);
   OP (FG, A, B, C, D,  5,  5, 0xd62f105d);
   OP (FG, D, A, B, C, 10,  9, 0x02441453);
   OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
   OP (FG, B, C, D, A,  4, 20, 0xe7d3fbc8);
   OP (FG, A, B, C, D,  9,  5, 0x21e1cde6);
   OP (FG, D, A, B, C, 14,  9, 0xc33707d6);
   OP (FG, C, D, A, B,  3, 14, 0xf4d50d87);
   OP (FG, B, C, D, A,  8, 20, 0x455a14ed);
   OP (FG, A, B, C, D, 13,  5, 0xa9e3e905);
   OP (FG, D, A, B, C,  2,  9, 0xfcefa3f8);
   OP (FG, C, D, A, B,  7, 14, 0x676f02d9);
   OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
 
   /* Round 3.  */
   OP (FH, A, B, C, D,  5,  4, 0xfffa3942);
   OP (FH, D, A, B, C,  8, 11, 0x8771f681);
   OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
   OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
   OP (FH, A, B, C, D,  1,  4, 0xa4beea44);
   OP (FH, D, A, B, C,  4, 11, 0x4bdecfa9);
   OP (FH, C, D, A, B,  7, 16, 0xf6bb4b60);
   OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
   OP (FH, A, B, C, D, 13,  4, 0x289b7ec6);
   OP (FH, D, A, B, C,  0, 11, 0xeaa127fa);
   OP (FH, C, D, A, B,  3, 16, 0xd4ef3085);
   OP (FH, B, C, D, A,  6, 23, 0x04881d05);
   OP (FH, A, B, C, D,  9,  4, 0xd9d4d039);
   OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
   OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
   OP (FH, B, C, D, A,  2, 23, 0xc4ac5665);
 
   /* Round 4.  */
   OP (FI, A, B, C, D,  0,  6, 0xf4292244);
   OP (FI, D, A, B, C,  7, 10, 0x432aff97);
   OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
   OP (FI, B, C, D, A,  5, 21, 0xfc93a039);
   OP (FI, A, B, C, D, 12,  6, 0x655b59c3);
   OP (FI, D, A, B, C,  3, 10, 0x8f0ccc92);
   OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
   OP (FI, B, C, D, A,  1, 21, 0x85845dd1);
   OP (FI, A, B, C, D,  8,  6, 0x6fa87e4f);
   OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
   OP (FI, C, D, A, B,  6, 15, 0xa3014314);
   OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
   OP (FI, A, B, C, D,  4,  6, 0xf7537e82);
   OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
   OP (FI, C, D, A, B,  2, 15, 0x2ad7d2bb);
   OP (FI, B, C, D, A,  9, 21, 0xeb86d391);
 
   /* Put checksum in context given as argument.  */
   ctx->A += A;
   ctx->B += B;
   ctx->C += C;
   ctx->D += D;
 
   return /*burn_stack*/ 80+6*sizeof(void*);
 }
 
 
 static unsigned int
 transform ( void *c, const unsigned char *data, size_t nblks )
 {
   unsigned int burn;
 
   do
     {
       burn = transform_blk (c, data);
       data += 64;
     }
   while (--nblks);
 
   return burn;
 }
 
 
 /* The routine final terminates the message-digest computation and
  * ends with the desired message digest in mdContext->digest[0...15].
  * The handle is prepared for a new MD5 cycle.
  * Returns 16 bytes representing the digest.
  */
 
 static void
 md5_final( void *context)
 {
   MD5_CONTEXT *hd = context;
   u32 t, th, msb, lsb;
   byte *p;
   unsigned int burn;
 
   t = hd->bctx.nblocks;
   if (sizeof t == sizeof hd->bctx.nblocks)
     th = hd->bctx.nblocks_high;
   else
     th = hd->bctx.nblocks >> 32;
 
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
   msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
   if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
   lsb <<= 3;
   msb <<= 3;
   msb |= t >> 29;
 
   if (hd->bctx.count < 56)  /* enough room */
     {
       hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
       if (hd->bctx.count < 56)
 	memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
 
       /* append the 64 bit count */
       buf_put_le32(hd->bctx.buf + 56, lsb);
       buf_put_le32(hd->bctx.buf + 60, msb);
       burn = transform (hd, hd->bctx.buf, 1);
     }
   else /* need one extra block */
     {
       hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
       /* fill pad and next block with zeroes */
       memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
 
       /* append the 64 bit count */
       buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
       buf_put_le32(hd->bctx.buf + 64 + 60, msb);
       burn = transform (hd, hd->bctx.buf, 2);
     }
 
   p = hd->bctx.buf;
 #define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0)
   X(A);
   X(B);
   X(C);
   X(D);
 #undef X
 
   hd->bctx.count = 0;
 
   _gcry_burn_stack (burn);
 }
 
 static byte *
 md5_read( void *context )
 {
   MD5_CONTEXT *hd = (MD5_CONTEXT *) context;
   return hd->bctx.buf;
 }
 
 static const byte asn[18] = /* Object ID is 1.2.840.113549.2.5 */
   { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48,
     0x86, 0xf7, 0x0d, 0x02, 0x05, 0x05, 0x00, 0x04, 0x10 };
 
 static const gcry_md_oid_spec_t oid_spec_md5[] =
   {
     /* iso.member-body.us.rsadsi.pkcs.pkcs-1.4 (md5WithRSAEncryption) */
     { "1.2.840.113549.1.1.4" },
     /* RSADSI digestAlgorithm MD5 */
     { "1.2.840.113549.2.5" },
     { NULL },
   };
 
 const gcry_md_spec_t _gcry_digest_spec_md5 =
   {
     GCRY_MD_MD5, {0, 1},
     "MD5", asn, DIM (asn), oid_spec_md5, 16,
     md5_init, _gcry_md_block_write, md5_final, md5_read, NULL,
     NULL,
     sizeof (MD5_CONTEXT)
   };
diff --git a/cipher/primegen.c b/cipher/primegen.c
index 25ebaf26..57bb6bec 100644
--- a/cipher/primegen.c
+++ b/cipher/primegen.c
@@ -1,1880 +1,1880 @@
 /* primegen.c - prime number generator
  * Copyright (C) 1998, 2000, 2001, 2002, 2003
  *               2004, 2008 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
 
 #include "g10lib.h"
 #include "mpi.h"
 #include "cipher.h"
 
 static gcry_mpi_t gen_prime (unsigned int nbits, int secret, int randomlevel,
                              int (*extra_check)(void *, gcry_mpi_t),
                              void *extra_check_arg);
 static int check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int rm_rounds,
                         gcry_prime_check_func_t cb_func, void *cb_arg );
 static int is_prime (gcry_mpi_t n, int steps, unsigned int *count);
 static void m_out_of_n( char *array, int m, int n );
 
 static void (*progress_cb) (void *,const char*,int,int, int );
 static void *progress_cb_data;
 
 /* Note: 2 is not included because it can be tested more easily by
    looking at bit 0. The last entry in this list is marked by a zero */
 static ushort small_prime_numbers[] = {
     3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
     47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101,
     103, 107, 109, 113, 127, 131, 137, 139, 149, 151,
     157, 163, 167, 173, 179, 181, 191, 193, 197, 199,
     211, 223, 227, 229, 233, 239, 241, 251, 257, 263,
     269, 271, 277, 281, 283, 293, 307, 311, 313, 317,
     331, 337, 347, 349, 353, 359, 367, 373, 379, 383,
     389, 397, 401, 409, 419, 421, 431, 433, 439, 443,
     449, 457, 461, 463, 467, 479, 487, 491, 499, 503,
     509, 521, 523, 541, 547, 557, 563, 569, 571, 577,
     587, 593, 599, 601, 607, 613, 617, 619, 631, 641,
     643, 647, 653, 659, 661, 673, 677, 683, 691, 701,
     709, 719, 727, 733, 739, 743, 751, 757, 761, 769,
     773, 787, 797, 809, 811, 821, 823, 827, 829, 839,
     853, 857, 859, 863, 877, 881, 883, 887, 907, 911,
     919, 929, 937, 941, 947, 953, 967, 971, 977, 983,
     991, 997, 1009, 1013, 1019, 1021, 1031, 1033,
     1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091,
     1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151,
     1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213,
     1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277,
     1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307,
     1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399,
     1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451,
     1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493,
     1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559,
     1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609,
     1613, 1619, 1621, 1627, 1637, 1657, 1663, 1667,
     1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733,
     1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789,
     1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871,
     1873, 1877, 1879, 1889, 1901, 1907, 1913, 1931,
     1933, 1949, 1951, 1973, 1979, 1987, 1993, 1997,
     1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053,
     2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111,
     2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161,
     2179, 2203, 2207, 2213, 2221, 2237, 2239, 2243,
     2251, 2267, 2269, 2273, 2281, 2287, 2293, 2297,
     2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357,
     2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411,
     2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473,
     2477, 2503, 2521, 2531, 2539, 2543, 2549, 2551,
     2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633,
     2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687,
     2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729,
     2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791,
     2797, 2801, 2803, 2819, 2833, 2837, 2843, 2851,
     2857, 2861, 2879, 2887, 2897, 2903, 2909, 2917,
     2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999,
     3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061,
     3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137,
     3163, 3167, 3169, 3181, 3187, 3191, 3203, 3209,
     3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271,
     3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331,
     3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391,
     3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467,
     3469, 3491, 3499, 3511, 3517, 3527, 3529, 3533,
     3539, 3541, 3547, 3557, 3559, 3571, 3581, 3583,
     3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643,
     3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709,
     3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779,
     3793, 3797, 3803, 3821, 3823, 3833, 3847, 3851,
     3853, 3863, 3877, 3881, 3889, 3907, 3911, 3917,
     3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989,
     4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049,
     4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111,
     4127, 4129, 4133, 4139, 4153, 4157, 4159, 4177,
     4201, 4211, 4217, 4219, 4229, 4231, 4241, 4243,
     4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297,
     4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391,
     4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457,
     4463, 4481, 4483, 4493, 4507, 4513, 4517, 4519,
     4523, 4547, 4549, 4561, 4567, 4583, 4591, 4597,
     4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657,
     4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729,
     4733, 4751, 4759, 4783, 4787, 4789, 4793, 4799,
     4801, 4813, 4817, 4831, 4861, 4871, 4877, 4889,
     4903, 4909, 4919, 4931, 4933, 4937, 4943, 4951,
     4957, 4967, 4969, 4973, 4987, 4993, 4999,
     0
 };
 static int no_of_small_prime_numbers = DIM (small_prime_numbers) - 1;
 
 
 
 /* An object and a list to build up a global pool of primes.  See
    save_pool_prime and get_pool_prime. */
 struct primepool_s
 {
   struct primepool_s *next;
   gcry_mpi_t prime;      /* If this is NULL the entry is not used. */
   unsigned int nbits;
   gcry_random_level_t randomlevel;
 };
 struct primepool_s *primepool;
 /* Mutex used to protect access to the primepool.  */
 GPGRT_LOCK_DEFINE (primepool_lock);
 
 
 gcry_err_code_t
 _gcry_primegen_init (void)
 {
   /* This function was formerly used to initialize the primepool
      Mutex. This has been replace by a static initialization.  */
   return 0;
 }
 
 
 /* Save PRIME which has been generated at RANDOMLEVEL for later
    use. Needs to be called while primepool_lock is being hold.  Note
    that PRIME should be considered released after calling this
    function. */
 static void
 save_pool_prime (gcry_mpi_t prime, gcry_random_level_t randomlevel)
 {
   struct primepool_s *item, *item2;
   size_t n;
 
   for (n=0, item = primepool; item; item = item->next, n++)
     if (!item->prime)
       break;
   if (!item && n > 100)
     {
       /* Remove some of the entries.  Our strategy is removing
          the last third from the list. */
       int i;
 
       for (i=0, item2 = primepool; item2; item2 = item2->next)
         {
           if (i >= n/3*2)
             {
               _gcry_mpi_release (item2->prime);
               item2->prime = NULL;
               if (!item)
                 item = item2;
             }
         }
     }
   if (!item)
     {
       item = xtrycalloc (1, sizeof *item);
       if (!item)
         {
           /* Out of memory.  Silently giving up. */
           _gcry_mpi_release (prime);
           return;
         }
       item->next = primepool;
       primepool = item;
     }
   item->prime = prime;
   item->nbits = mpi_get_nbits (prime);
   item->randomlevel = randomlevel;
 }
 
 
 /* Return a prime for the prime pool or NULL if none has been found.
    The prime needs to match NBITS and randomlevel. This function needs
    to be called with the primepool_look is being hold. */
 static gcry_mpi_t
 get_pool_prime (unsigned int nbits, gcry_random_level_t randomlevel)
 {
   struct primepool_s *item;
 
   for (item = primepool; item; item = item->next)
     if (item->prime
         && item->nbits == nbits && item->randomlevel == randomlevel)
       {
         gcry_mpi_t prime = item->prime;
         item->prime = NULL;
         gcry_assert (nbits == mpi_get_nbits (prime));
         return prime;
       }
   return NULL;
 }
 
 
 
 
 
 
 void
 _gcry_register_primegen_progress ( void (*cb)(void *,const char*,int,int,int),
                                    void *cb_data )
 {
   progress_cb = cb;
   progress_cb_data = cb_data;
 }
 
 
 static void
 progress( int c )
 {
   if ( progress_cb )
     progress_cb ( progress_cb_data, "primegen", c, 0, 0 );
 }
 
 
 /****************
  * Generate a prime number (stored in secure memory)
  */
 gcry_mpi_t
 _gcry_generate_secret_prime (unsigned int nbits,
                              gcry_random_level_t random_level,
                              int (*extra_check)(void*, gcry_mpi_t),
                              void *extra_check_arg)
 {
   gcry_mpi_t prime;
 
   prime = gen_prime (nbits, 1, random_level, extra_check, extra_check_arg);
   progress('\n');
   return prime;
 }
 
 
 /* Generate a prime number which may be public, i.e. not allocated in
    secure memory.  */
 gcry_mpi_t
 _gcry_generate_public_prime (unsigned int nbits,
                              gcry_random_level_t random_level,
                              int (*extra_check)(void*, gcry_mpi_t),
                              void *extra_check_arg)
 {
   gcry_mpi_t prime;
 
   prime = gen_prime (nbits, 0, random_level, extra_check, extra_check_arg);
   progress('\n');
   return prime;
 }
 
 
 /* Core prime generation function.  The algorithm used to generate
    practically save primes is due to Lim and Lee as described in the
    CRYPTO '97 proceedings (ISBN3540633847) page 260.
 
    NEED_Q_FACTOR: If true make sure that at least one factor is of
                   size qbits.  This is for example required for DSA.
    PRIME_GENERATED: Adresss of a variable where the resulting prime
                     number will be stored.
    PBITS: Requested size of the prime number.  At least 48.
    QBITS: One factor of the prime needs to be of this size.  Maybe 0
           if this is not required.  See also MODE.
    G: If not NULL an MPI which will receive a generator for the prime
       for use with Elgamal.
    RET_FACTORS: if not NULL, an array with all factors are stored at
                 that address.
    ALL_FACTORS: If set to true all factors of prime-1 are returned.
    RANDOMLEVEL:  How strong should the random numers be.
    FLAGS: Prime generation bit flags. Currently supported:
           GCRY_PRIME_FLAG_SECRET - The prime needs to be kept secret.
    CB_FUNC, CB_ARG:  Callback to be used for extra checks.
 
  */
 static gcry_err_code_t
 prime_generate_internal (int need_q_factor,
 			 gcry_mpi_t *prime_generated, unsigned int pbits,
 			 unsigned int qbits, gcry_mpi_t g,
 			 gcry_mpi_t **ret_factors,
 			 gcry_random_level_t randomlevel, unsigned int flags,
                          int all_factors,
                          gcry_prime_check_func_t cb_func, void *cb_arg)
 {
   gcry_err_code_t err = 0;
   gcry_mpi_t *factors_new = NULL; /* Factors to return to the
 				     caller.  */
   gcry_mpi_t *factors = NULL;	/* Current factors.  */
   gcry_random_level_t poolrandomlevel; /* Random level used for pool primes. */
   gcry_mpi_t *pool = NULL;	/* Pool of primes.  */
   int *pool_in_use = NULL;      /* Array with currently used POOL elements. */
   unsigned char *perms = NULL;	/* Permutations of POOL.  */
   gcry_mpi_t q_factor = NULL;	/* Used if QBITS is non-zero.  */
   unsigned int fbits = 0;	/* Length of prime factors.  */
   unsigned int n = 0;		/* Number of factors.  */
   unsigned int m = 0;		/* Number of primes in pool.  */
   gcry_mpi_t q = NULL;		/* First prime factor.  */
   gcry_mpi_t prime = NULL;	/* Prime candidate.  */
   unsigned int nprime = 0;	/* Bits of PRIME.  */
   unsigned int req_qbits;       /* The original QBITS value.  */
   gcry_mpi_t val_2;             /* For check_prime().  */
   int is_locked = 0;            /* Flag to help unlocking the primepool. */
   unsigned int is_secret = (flags & GCRY_PRIME_FLAG_SECRET);
   unsigned int count1 = 0, count2 = 0;
   unsigned int i = 0, j = 0;
 
   if (pbits < 48)
     return GPG_ERR_INV_ARG;
 
   /* We won't use a too strong random elvel for the pooled subprimes. */
   poolrandomlevel = (randomlevel > GCRY_STRONG_RANDOM?
                      GCRY_STRONG_RANDOM : randomlevel);
 
 
   /* If QBITS is not given, assume a reasonable value. */
   if (!qbits)
     qbits = pbits / 3;
 
   req_qbits = qbits;
 
   /* Find number of needed prime factors N.  */
   for (n = 1; (pbits - qbits - 1) / n  >= qbits; n++)
     ;
   n--;
 
   val_2 = mpi_alloc_set_ui (2);
 
   if ((! n) || ((need_q_factor) && (n < 2)))
     {
       err = GPG_ERR_INV_ARG;
       goto leave;
     }
 
   if (need_q_factor)
     {
       n--;  /* Need one factor less because we want a specific Q-FACTOR. */
       fbits = (pbits - 2 * req_qbits -1) / n;
       qbits =  pbits - req_qbits - n * fbits;
     }
   else
     {
       fbits = (pbits - req_qbits -1) / n;
       qbits = pbits - n * fbits;
     }
 
   if (DBG_CIPHER)
     log_debug ("gen prime: pbits=%u qbits=%u fbits=%u/%u n=%d\n",
                pbits, req_qbits, qbits, fbits, n);
 
   /* Allocate an integer to old the new prime. */
   prime = mpi_new (pbits);
 
   /* Generate first prime factor.  */
   q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL);
 
   /* Generate a specific Q-Factor if requested. */
   if (need_q_factor)
     q_factor = gen_prime (req_qbits, is_secret, randomlevel, NULL, NULL);
 
   /* Allocate an array to hold all factors + 2 for later usage.  */
   factors = xtrycalloc (n + 2, sizeof (*factors));
   if (!factors)
     {
       err = gpg_err_code_from_errno (errno);
       goto leave;
     }
 
   /* Allocate an array to track pool usage. */
   pool_in_use = xtrymalloc (n * sizeof *pool_in_use);
   if (!pool_in_use)
     {
       err = gpg_err_code_from_errno (errno);
       goto leave;
     }
   for (i=0; i < n; i++)
     pool_in_use[i] = -1;
 
   /* Make a pool of 3n+5 primes (this is an arbitrary value).  We
      require at least 30 primes for are useful selection process.
 
      Fixme: We need to research the best formula for sizing the pool.
   */
   m = n * 3 + 5;
   if (need_q_factor) /* Need some more in this case. */
     m += 5;
   if (m < 30)
     m = 30;
   pool = xtrycalloc (m , sizeof (*pool));
   if (! pool)
     {
       err = gpg_err_code_from_errno (errno);
       goto leave;
     }
 
   /* Permutate over the pool of primes until we find a prime of the
      requested length.  */
   do
     {
     next_try:
       for (i=0; i < n; i++)
         pool_in_use[i] = -1;
 
       if (!perms)
         {
           /* Allocate new primes.  This is done right at the beginning
              of the loop and if we have later run out of primes. */
           for (i = 0; i < m; i++)
             {
               mpi_free (pool[i]);
               pool[i] = NULL;
             }
 
           /* Init m_out_of_n().  */
           perms = xtrycalloc (1, m);
           if (!perms)
             {
               err = gpg_err_code_from_errno (errno);
               goto leave;
             }
 
           err = gpgrt_lock_lock (&primepool_lock);
           if (err)
             goto leave;
           is_locked = 1;
 
           for (i = 0; i < n; i++)
             {
               perms[i] = 1;
               /* At a maximum we use strong random for the factors.
                  This saves us a lot of entropy. Given that Q and
                  possible Q-factor are also used in the final prime
                  this should be acceptable.  We also don't allocate in
                  secure memory to save on that scare resource too.  If
                  Q has been allocated in secure memory, the final
                  prime will be saved there anyway.  This is because
                  our MPI routines take care of that.  GnuPG has worked
                  this way ever since.  */
               pool[i] = NULL;
               if (is_locked)
                 {
                   pool[i] = get_pool_prime (fbits, poolrandomlevel);
                   if (!pool[i])
                     {
                       err = gpgrt_lock_unlock (&primepool_lock);
                       if (err)
                         goto leave;
                       is_locked = 0;
                     }
                 }
               if (!pool[i])
                 pool[i] = gen_prime (fbits, 0, poolrandomlevel, NULL, NULL);
               pool_in_use[i] = i;
               factors[i] = pool[i];
             }
 
           if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock)))
             goto leave;
           is_locked = 0;
         }
       else
         {
           /* Get next permutation. */
           m_out_of_n ( (char*)perms, n, m);
 
           if ((err = gpgrt_lock_lock (&primepool_lock)))
             goto leave;
           is_locked = 1;
 
           for (i = j = 0; (i < m) && (j < n); i++)
             if (perms[i])
               {
                 /* If the subprime has not yet beed generated do it now. */
                 if (!pool[i] && is_locked)
                   {
                     pool[i] = get_pool_prime (fbits, poolrandomlevel);
                     if (!pool[i])
                       {
                         if ((err = gpgrt_lock_unlock (&primepool_lock)))
                           goto leave;
                         is_locked = 0;
                       }
                   }
                 if (!pool[i])
                   pool[i] = gen_prime (fbits, 0, poolrandomlevel, NULL, NULL);
                 pool_in_use[j] = i;
                 factors[j++] = pool[i];
               }
 
           if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock)))
             goto leave;
           is_locked = 0;
 
           if (i == n)
             {
               /* Ran out of permutations: Allocate new primes.  */
               xfree (perms);
               perms = NULL;
               progress ('!');
               goto next_try;
             }
         }
 
 	/* Generate next prime candidate:
 	   p = 2 * q [ * q_factor] * factor_0 * factor_1 * ... * factor_n + 1.
          */
 	mpi_set (prime, q);
 	mpi_mul_ui (prime, prime, 2);
 	if (need_q_factor)
 	  mpi_mul (prime, prime, q_factor);
 	for(i = 0; i < n; i++)
 	  mpi_mul (prime, prime, factors[i]);
 	mpi_add_ui (prime, prime, 1);
 	nprime = mpi_get_nbits (prime);
 
 	if (nprime < pbits)
 	  {
 	    if (++count1 > 20)
 	      {
 		count1 = 0;
 		qbits++;
 		progress('>');
 		mpi_free (q);
 		q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL);
 		goto next_try;
 	      }
 	  }
 	else
 	  count1 = 0;
 
 	if (nprime > pbits)
 	  {
 	    if (++count2 > 20)
 	      {
 		count2 = 0;
 		qbits--;
 		progress('<');
 		mpi_free (q);
 		q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL);
 		goto next_try;
 	      }
 	  }
 	else
 	  count2 = 0;
     }
   while (! ((nprime == pbits) && check_prime (prime, val_2, 5,
                                               cb_func, cb_arg)));
 
   if (DBG_CIPHER)
     {
       progress ('\n');
       log_mpidump ("prime    ", prime);
       log_mpidump ("factor  q", q);
       if (need_q_factor)
         log_mpidump ("factor q0", q_factor);
       for (i = 0; i < n; i++)
         log_mpidump ("factor pi", factors[i]);
       log_debug ("bit sizes: prime=%u, q=%u",
                  mpi_get_nbits (prime), mpi_get_nbits (q));
       if (need_q_factor)
         log_printf (", q0=%u", mpi_get_nbits (q_factor));
       for (i = 0; i < n; i++)
         log_printf (", p%d=%u", i, mpi_get_nbits (factors[i]));
       log_printf ("\n");
     }
 
   if (ret_factors)
     {
       /* Caller wants the factors.  */
       factors_new = xtrycalloc (n + 4, sizeof (*factors_new));
       if (! factors_new)
         {
           err = gpg_err_code_from_errno (errno);
           goto leave;
         }
 
       if (all_factors)
         {
           i = 0;
           factors_new[i++] = mpi_set_ui (NULL, 2);
           factors_new[i++] = mpi_copy (q);
           if (need_q_factor)
             factors_new[i++] = mpi_copy (q_factor);
           for(j=0; j < n; j++)
             factors_new[i++] = mpi_copy (factors[j]);
         }
       else
         {
           i = 0;
           if (need_q_factor)
             {
               factors_new[i++] = mpi_copy (q_factor);
               for (; i <= n; i++)
                 factors_new[i] = mpi_copy (factors[i]);
             }
           else
             for (; i < n; i++ )
               factors_new[i] = mpi_copy (factors[i]);
         }
     }
 
   if (g && need_q_factor)
     err = GPG_ERR_NOT_IMPLEMENTED;
   else if (g)
     {
       /* Create a generator (start with 3).  */
       gcry_mpi_t tmp = mpi_alloc (mpi_get_nlimbs (prime));
       gcry_mpi_t b = mpi_alloc (mpi_get_nlimbs (prime));
       gcry_mpi_t pmin1 = mpi_alloc (mpi_get_nlimbs (prime));
 
       factors[n] = q;
       factors[n + 1] = mpi_alloc_set_ui (2);
       mpi_sub_ui (pmin1, prime, 1);
       mpi_set_ui (g, 2);
       do
         {
           mpi_add_ui (g, g, 1);
           if (DBG_CIPHER)
             log_printmpi ("checking g", g);
           else
             progress('^');
           for (i = 0; i < n + 2; i++)
             {
               mpi_fdiv_q (tmp, pmin1, factors[i]);
               /* No mpi_pow(), but it is okay to use this with mod
                  prime.  */
               mpi_powm (b, g, tmp, prime);
               if (! mpi_cmp_ui (b, 1))
                 break;
             }
           if (DBG_CIPHER)
             progress('\n');
         }
       while (i < n + 2);
 
       mpi_free (factors[n+1]);
       mpi_free (tmp);
       mpi_free (b);
       mpi_free (pmin1);
     }
 
   if (! DBG_CIPHER)
     progress ('\n');
 
 
  leave:
   if (pool)
     {
       is_locked = !gpgrt_lock_lock (&primepool_lock);
       for(i = 0; i < m; i++)
         {
           if (pool[i])
             {
               for (j=0; j < n; j++)
                 if (pool_in_use[j] == i)
                   break;
               if (j == n && is_locked)
                 {
                   /* This pooled subprime has not been used. */
                   save_pool_prime (pool[i], poolrandomlevel);
                 }
               else
                 mpi_free (pool[i]);
             }
         }
       if (is_locked)
         err = gpgrt_lock_unlock (&primepool_lock);
       is_locked = 0;
       xfree (pool);
     }
   xfree (pool_in_use);
   if (factors)
     xfree (factors);  /* Factors are shallow copies.  */
   if (perms)
     xfree (perms);
 
   mpi_free (val_2);
   mpi_free (q);
   mpi_free (q_factor);
 
   if (! err)
     {
       *prime_generated = prime;
       if (ret_factors)
 	*ret_factors = factors_new;
     }
   else
     {
       if (factors_new)
 	{
 	  for (i = 0; factors_new[i]; i++)
 	    mpi_free (factors_new[i]);
 	  xfree (factors_new);
 	}
       mpi_free (prime);
     }
 
   return err;
 }
 
 
 /* Generate a prime used for discrete logarithm algorithms; i.e. this
    prime will be public and no strong random is required.  On success
    R_PRIME receives a new MPI with the prime.  On error R_PRIME is set
    to NULL and an error code is returned.  If RET_FACTORS is not NULL
    it is set to an allocated array of factors on success or to NULL on
    error.  */
 gcry_err_code_t
 _gcry_generate_elg_prime (int mode, unsigned pbits, unsigned qbits,
 			  gcry_mpi_t g,
                           gcry_mpi_t *r_prime, gcry_mpi_t **ret_factors)
 {
   *r_prime = NULL;
   if (ret_factors)
     *ret_factors = NULL;
   return prime_generate_internal ((mode == 1), r_prime, pbits, qbits, g,
                                   ret_factors, GCRY_WEAK_RANDOM, 0, 0,
                                   NULL, NULL);
 }
 
 
 static gcry_mpi_t
 gen_prime (unsigned int nbits, int secret, int randomlevel,
            int (*extra_check)(void *, gcry_mpi_t), void *extra_check_arg)
 {
   gcry_mpi_t prime, ptest, pminus1, val_2, val_3, result;
   int i;
   unsigned int x, step;
   unsigned int count1, count2;
   int *mods;
 
   (void)count1; /* The value is not used, actually.  */
 
 /*   if (  DBG_CIPHER ) */
 /*     log_debug ("generate a prime of %u bits ", nbits ); */
 
   if (nbits < 16)
     log_fatal ("can't generate a prime with less than %d bits\n", 16);
 
   mods = (secret? xmalloc_secure (no_of_small_prime_numbers * sizeof *mods)
           /* */ : xmalloc (no_of_small_prime_numbers * sizeof *mods));
   /* Make nbits fit into gcry_mpi_t implementation. */
   val_2  = mpi_alloc_set_ui( 2 );
   val_3 = mpi_alloc_set_ui( 3);
   prime  = secret? mpi_snew (nbits): mpi_new (nbits);
   result = mpi_alloc_like( prime );
   pminus1= mpi_alloc_like( prime );
   ptest  = mpi_alloc_like( prime );
   count1 = count2 = 0;
   for (;;)
     {  /* try forvever */
       int dotcount=0;
 
       /* generate a random number */
       _gcry_mpi_randomize( prime, nbits, randomlevel );
 
       /* Set high order bit to 1, set low order bit to 1.  If we are
          generating a secret prime we are most probably doing that
          for RSA, to make sure that the modulus does have the
          requested key size we set the 2 high order bits. */
       mpi_set_highbit (prime, nbits-1);
       if (secret)
         mpi_set_bit (prime, nbits-2);
       mpi_set_bit(prime, 0);
 
       /* Calculate all remainders. */
       for (i=0; (x = small_prime_numbers[i]); i++ )
         mods[i] = mpi_fdiv_r_ui(NULL, prime, x);
 
       /* Now try some primes starting with prime. */
       for(step=0; step < 20000; step += 2 )
         {
           /* Check against all the small primes we have in mods. */
           count1++;
           for (i=0; (x = small_prime_numbers[i]); i++ )
             {
               while ( mods[i] + step >= x )
                 mods[i] -= x;
               if ( !(mods[i] + step) )
                 break;
 	    }
           if ( x )
             continue;   /* Found a multiple of an already known prime. */
 
           mpi_add_ui( ptest, prime, step );
 
           /* Do a fast Fermat test now. */
           count2++;
           mpi_sub_ui( pminus1, ptest, 1);
           mpi_powm( result, val_2, pminus1, ptest );
           if ( !mpi_cmp_ui( result, 1 ) )
             {
               /* Not composite, perform stronger tests */
               if (is_prime(ptest, 5, &count2 ))
                 {
                   if (!mpi_test_bit( ptest, nbits-1-secret ))
                     {
                       progress('\n');
                       log_debug ("overflow in prime generation\n");
                       break; /* Stop loop, continue with a new prime. */
                     }
 
                   if (extra_check && extra_check (extra_check_arg, ptest))
                     {
                       /* The extra check told us that this prime is
                          not of the caller's taste. */
                       progress ('/');
                     }
                   else
                     {
                       /* Got it. */
                       mpi_free(val_2);
                       mpi_free(val_3);
                       mpi_free(result);
                       mpi_free(pminus1);
                       mpi_free(prime);
                       xfree(mods);
                       return ptest;
                     }
                 }
 	    }
           if (++dotcount == 10 )
             {
               progress('.');
               dotcount = 0;
 	    }
 	}
       progress(':'); /* restart with a new random value */
     }
 }
 
 /****************
  * Returns: true if this may be a prime
  * RM_ROUNDS gives the number of Rabin-Miller tests to run.
  */
 static int
 check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int rm_rounds,
              gcry_prime_check_func_t cb_func, void *cb_arg)
 {
   int i;
   unsigned int x;
   unsigned int count=0;
 
   /* Check against small primes. */
   for (i=0; (x = small_prime_numbers[i]); i++ )
     {
       if ( mpi_divisible_ui( prime, x ) )
         return !mpi_cmp_ui (prime, x);
     }
 
   /* A quick Fermat test. */
   {
     gcry_mpi_t result = mpi_alloc_like( prime );
     gcry_mpi_t pminus1 = mpi_alloc_like( prime );
     mpi_sub_ui( pminus1, prime, 1);
     mpi_powm( result, val_2, pminus1, prime );
     mpi_free( pminus1 );
     if ( mpi_cmp_ui( result, 1 ) )
       {
         /* Is composite. */
         mpi_free( result );
         progress('.');
         return 0;
       }
     mpi_free( result );
   }
 
   if (!cb_func || cb_func (cb_arg, GCRY_PRIME_CHECK_AT_MAYBE_PRIME, prime))
     {
       /* Perform stronger tests. */
       if ( is_prime( prime, rm_rounds, &count ) )
         {
           if (!cb_func
               || cb_func (cb_arg, GCRY_PRIME_CHECK_AT_GOT_PRIME, prime))
             return 1; /* Probably a prime. */
         }
     }
   progress('.');
   return 0;
 }
 
 
 /*
  * Return true if n is probably a prime
  */
 static int
 is_prime (gcry_mpi_t n, int steps, unsigned int *count)
 {
   gcry_mpi_t x = mpi_alloc( mpi_get_nlimbs( n ) );
   gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs( n ) );
   gcry_mpi_t z = mpi_alloc( mpi_get_nlimbs( n ) );
   gcry_mpi_t nminus1 = mpi_alloc( mpi_get_nlimbs( n ) );
   gcry_mpi_t a2 = mpi_alloc_set_ui( 2 );
   gcry_mpi_t q;
   unsigned i, j, k;
   int rc = 0;
   unsigned nbits = mpi_get_nbits( n );
 
   if (steps < 5) /* Make sure that we do at least 5 rounds. */
     steps = 5;
 
   mpi_sub_ui( nminus1, n, 1 );
 
   /* Find q and k, so that n = 1 + 2^k * q . */
   q = mpi_copy ( nminus1 );
   k = mpi_trailing_zeros ( q );
   mpi_tdiv_q_2exp (q, q, k);
 
   for (i=0 ; i < steps; i++ )
     {
       ++*count;
       if( !i )
         {
           mpi_set_ui( x, 2 );
         }
       else
         {
           /* We need to loop to avoid an X with value 0 or 1.  */
           do
             {
               _gcry_mpi_randomize (x, nbits, GCRY_WEAK_RANDOM);
 
               /* Make sure that the number is smaller than the prime
                * and keep the randomness of the high bit. */
               if (mpi_test_bit (x, nbits-2))
                 {
                   mpi_set_highbit (x, nbits-2); /* Clear all higher bits. */
                 }
               else
                 {
                   mpi_set_highbit (x, nbits-2);
                   mpi_clear_bit (x, nbits-2);
                 }
             }
           while (mpi_cmp_ui (x, 1) <= 0);
           gcry_assert (mpi_cmp (x, nminus1) < 0);
 	}
       mpi_powm ( y, x, q, n);
       if ( mpi_cmp_ui(y, 1) && mpi_cmp( y, nminus1 ) )
         {
           for ( j=1; j < k && mpi_cmp( y, nminus1 ); j++ )
             {
               mpi_powm(y, y, a2, n);
               if( !mpi_cmp_ui( y, 1 ) )
                 goto leave; /* Not a prime. */
             }
           if (mpi_cmp( y, nminus1 ) )
             goto leave; /* Not a prime. */
 	}
       progress('+');
     }
   rc = 1; /* May be a prime. */
 
  leave:
   mpi_free( x );
   mpi_free( y );
   mpi_free( z );
   mpi_free( nminus1 );
   mpi_free( q );
   mpi_free( a2 );
 
   return rc;
 }
 
 
 /* Given ARRAY of size N with M elements set to true produce a
    modified array with the next permutation of M elements.  Note, that
    ARRAY is used in a one-bit-per-byte approach.  To detected the last
    permutation it is useful to initialize the array with the first M
    element set to true and use this test:
        m_out_of_n (array, m, n);
        for (i = j = 0; i < n && j < m; i++)
          if (array[i])
            j++;
        if (j == m)
          goto ready;
 
    This code is based on the algorithm 452 from the "Collected
    Algorithms From ACM, Volume II" by C. N. Liu and D. T. Tang.
 */
 static void
 m_out_of_n ( char *array, int m, int n )
 {
   int i=0, i1=0, j=0, jp=0,  j1=0, k1=0, k2=0;
 
   if( !m || m >= n )
     return;
 
   /* Need to handle this simple case separately. */
   if( m == 1 )
     {
       for (i=0; i < n; i++ )
         {
           if ( array[i] )
             {
               array[i++] = 0;
               if( i >= n )
                 i = 0;
               array[i] = 1;
               return;
             }
         }
       BUG();
     }
 
 
   for (j=1; j < n; j++ )
     {
       if ( array[n-1] == array[n-j-1])
         continue;
       j1 = j;
       break;
     }
 
   if ( (m & 1) )
     {
       /* M is odd. */
       if( array[n-1] )
         {
           if( j1 & 1 )
             {
               k1 = n - j1;
               k2 = k1+2;
               if( k2 > n )
                 k2 = n;
               goto leave;
             }
           goto scan;
         }
       k2 = n - j1 - 1;
       if( k2 == 0 )
         {
           k1 = i;
           k2 = n - j1;
         }
       else if( array[k2] && array[k2-1] )
         k1 = n;
       else
         k1 = k2 + 1;
     }
   else
     {
       /* M is even. */
       if( !array[n-1] )
         {
           k1 = n - j1;
           k2 = k1 + 1;
           goto leave;
         }
 
       if( !(j1 & 1) )
         {
           k1 = n - j1;
           k2 = k1+2;
           if( k2 > n )
             k2 = n;
           goto leave;
         }
     scan:
       jp = n - j1 - 1;
       for (i=1; i <= jp; i++ )
         {
           i1 = jp + 2 - i;
           if( array[i1-1]  )
             {
               if( array[i1-2] )
                 {
                   k1 = i1 - 1;
                   k2 = n - j1;
 		}
               else
                 {
                   k1 = i1 - 1;
                   k2 = n + 1 - j1;
                 }
               goto leave;
             }
         }
       k1 = 1;
       k2 = n + 1 - m;
     }
  leave:
   /* Now complement the two selected bits. */
   array[k1-1] = !array[k1-1];
   array[k2-1] = !array[k2-1];
 }
 
 
 /* Generate a new prime number of PRIME_BITS bits and store it in
    PRIME.  If FACTOR_BITS is non-zero, one of the prime factors of
    (prime - 1) / 2 must be FACTOR_BITS bits long.  If FACTORS is
    non-zero, allocate a new, NULL-terminated array holding the prime
    factors and store it in FACTORS.  FLAGS might be used to influence
    the prime number generation process.  */
 gcry_err_code_t
 _gcry_prime_generate (gcry_mpi_t *prime, unsigned int prime_bits,
                       unsigned int factor_bits, gcry_mpi_t **factors,
                       gcry_prime_check_func_t cb_func, void *cb_arg,
                       gcry_random_level_t random_level,
                       unsigned int flags)
 {
   gcry_err_code_t rc = 0;
   gcry_mpi_t *factors_generated = NULL;
   gcry_mpi_t prime_generated = NULL;
   unsigned int mode = 0;
 
   if (!prime)
     return GPG_ERR_INV_ARG;
   *prime = NULL;
 
   if (flags & GCRY_PRIME_FLAG_SPECIAL_FACTOR)
     mode = 1;
 
   /* Generate.  */
   rc = prime_generate_internal ((mode==1), &prime_generated, prime_bits,
                                 factor_bits, NULL,
                                 factors? &factors_generated : NULL,
                                 random_level, flags, 1,
                                 cb_func, cb_arg);
 
   if (!rc && cb_func)
     {
       /* Additional check. */
       if ( !cb_func (cb_arg, GCRY_PRIME_CHECK_AT_FINISH, prime_generated))
         {
           /* Failed, deallocate resources.  */
           unsigned int i;
 
           mpi_free (prime_generated);
           if (factors)
             {
               for (i = 0; factors_generated[i]; i++)
                 mpi_free (factors_generated[i]);
               xfree (factors_generated);
             }
           rc = GPG_ERR_GENERAL;
         }
     }
 
   if (!rc)
     {
       if (factors)
         *factors = factors_generated;
       *prime = prime_generated;
     }
 
   return rc;
 }
 
 /* Check whether the number X is prime.  */
 gcry_err_code_t
 _gcry_prime_check (gcry_mpi_t x, unsigned int flags)
 {
   (void)flags;
 
   switch (mpi_cmp_ui (x, 2))
     {
     case 0:  return 0;                /* 2 is a prime */
     case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes.  */
     }
 
   /* We use 64 rounds because the prime we are going to test is not
      guaranteed to be a random one. */
   if (check_prime (x, mpi_const (MPI_C_TWO), 64, NULL, NULL))
     return 0;
 
   return GPG_ERR_NO_PRIME;
 }
 
 
 /* Check whether the number X is prime according to FIPS 186-4 table C.2.  */
 gcry_err_code_t
 _gcry_fips186_4_prime_check (gcry_mpi_t x, unsigned int bits)
 {
   gcry_err_code_t ec = GPG_ERR_NO_ERROR;
 
   switch (mpi_cmp_ui (x, 2))
     {
     case 0:  return ec;               /* 2 is a prime */
     case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes.  */
     }
 
   /* We use 5 or 4 rounds as specified in table C.2 */
   if (! check_prime (x, mpi_const (MPI_C_TWO), bits > 1024 ? 4 : 5, NULL, NULL))
     ec = GPG_ERR_NO_PRIME;
 
   return ec;
 }
 
 
 /* Find a generator for PRIME where the factorization of (prime-1) is
    in the NULL terminated array FACTORS. Return the generator as a
    newly allocated MPI in R_G.  If START_G is not NULL, use this as s
    atart for the search. Returns 0 on success.*/
 gcry_err_code_t
 _gcry_prime_group_generator (gcry_mpi_t *r_g,
                              gcry_mpi_t prime, gcry_mpi_t *factors,
                              gcry_mpi_t start_g)
 {
   gcry_mpi_t tmp, b, pmin1, g;
   int first, i, n;
 
   if (!r_g)
     return GPG_ERR_INV_ARG;
   *r_g = NULL;
   if (!factors || !prime)
     return GPG_ERR_INV_ARG;
 
   for (n=0; factors[n]; n++)
     ;
   if (n < 2)
     return GPG_ERR_INV_ARG;
 
   tmp   = mpi_new (0);
   b     = mpi_new (0);
   pmin1 = mpi_new (0);
   g     = start_g? mpi_copy (start_g) : mpi_set_ui (NULL, 3);
 
   /* Extra sanity check - usually disabled. */
 /*   mpi_set (tmp, factors[0]); */
 /*   for(i = 1; i < n; i++) */
 /*     mpi_mul (tmp, tmp, factors[i]); */
 /*   mpi_add_ui (tmp, tmp, 1); */
 /*   if (mpi_cmp (prime, tmp)) */
 /*     return gpg_error (GPG_ERR_INV_ARG); */
 
   mpi_sub_ui (pmin1, prime, 1);
   first = 1;
   do
     {
       if (first)
         first = 0;
       else
         mpi_add_ui (g, g, 1);
 
       if (DBG_CIPHER)
         log_printmpi ("checking g", g);
       else
         progress('^');
 
       for (i = 0; i < n; i++)
         {
           mpi_fdiv_q (tmp, pmin1, factors[i]);
           mpi_powm (b, g, tmp, prime);
           if (! mpi_cmp_ui (b, 1))
             break;
         }
       if (DBG_CIPHER)
         progress('\n');
     }
   while (i < n);
 
   _gcry_mpi_release (tmp);
   _gcry_mpi_release (b);
   _gcry_mpi_release (pmin1);
   *r_g = g;
 
   return 0;
 }
 
 /* Convenience function to release the factors array. */
 void
 _gcry_prime_release_factors (gcry_mpi_t *factors)
 {
   if (factors)
     {
       int i;
 
       for (i=0; factors[i]; i++)
         mpi_free (factors[i]);
       xfree (factors);
     }
 }
 
 
 
 /* Helper for _gcry_derive_x931_prime.  */
 static gcry_mpi_t
 find_x931_prime (const gcry_mpi_t pfirst)
 {
   gcry_mpi_t val_2 = mpi_alloc_set_ui (2);
   gcry_mpi_t prime;
 
   prime = mpi_copy (pfirst);
   /* If P is even add 1.  */
   mpi_set_bit (prime, 0);
 
   /* We use 64 Rabin-Miller rounds which is better and thus
      sufficient.  We do not have a Lucas test implementation thus we
      can't do it in the X9.31 preferred way of running a few
      Rabin-Miller followed by one Lucas test.  */
   while ( !check_prime (prime, val_2, 64, NULL, NULL) )
     mpi_add_ui (prime, prime, 2);
 
   mpi_free (val_2);
 
   return prime;
 }
 
 
 /* Generate a prime using the algorithm from X9.31 appendix B.4.
 
    This function requires that the provided public exponent E is odd.
    XP, XP1 and XP2 are the seed values.  All values are mandatory.
 
    On success the prime is returned.  If R_P1 or R_P2 are given the
    internal values P1 and P2 are saved at these addresses.  On error
    NULL is returned.  */
 gcry_mpi_t
 _gcry_derive_x931_prime (const gcry_mpi_t xp,
                          const gcry_mpi_t xp1, const gcry_mpi_t xp2,
                          const gcry_mpi_t e,
                          gcry_mpi_t *r_p1, gcry_mpi_t *r_p2)
 {
   gcry_mpi_t p1, p2, p1p2, yp0;
 
   if (!xp || !xp1 || !xp2)
     return NULL;
   if (!e || !mpi_test_bit (e, 0))
     return NULL;  /* We support only odd values for E.  */
 
   p1 = find_x931_prime (xp1);
   p2 = find_x931_prime (xp2);
   p1p2 = mpi_alloc_like (xp);
   mpi_mul (p1p2, p1, p2);
 
   {
     gcry_mpi_t r1, tmp;
 
     /* r1 = (p2^{-1} mod p1)p2 - (p1^{-1} mod p2) */
     tmp = mpi_alloc_like (p1);
     mpi_invm (tmp, p2, p1);
     mpi_mul (tmp, tmp, p2);
     r1 = tmp;
 
     tmp = mpi_alloc_like (p2);
     mpi_invm (tmp, p1, p2);
     mpi_mul (tmp, tmp, p1);
     mpi_sub (r1, r1, tmp);
 
     /* Fixup a negative value.  */
     if (mpi_has_sign (r1))
       mpi_add (r1, r1, p1p2);
 
     /* yp0 = xp + (r1 - xp mod p1*p2)  */
     yp0 = tmp; tmp = NULL;
     mpi_subm (yp0, r1, xp, p1p2);
     mpi_add (yp0, yp0, xp);
     mpi_free (r1);
 
     /* Fixup a negative value.  */
     if (mpi_cmp (yp0, xp) < 0 )
       mpi_add (yp0, yp0, p1p2);
   }
 
   /* yp0 is now the first integer greater than xp with p1 being a
      large prime factor of yp0-1 and p2 a large prime factor of yp0+1.  */
 
   /* Note that the first example from X9.31 (D.1.1) which uses
        (Xq1 #1A5CF72EE770DE50CB09ACCEA9#)
        (Xq2 #134E4CAA16D2350A21D775C404#)
        (Xq  #CC1092495D867E64065DEE3E7955F2EBC7D47A2D
              7C9953388F97DDDC3E1CA19C35CA659EDC2FC325
              6D29C2627479C086A699A49C4C9CEE7EF7BD1B34
              321DE34A#))))
      returns an yp0 of
             #CC1092495D867E64065DEE3E7955F2EBC7D47A2D
              7C9953388F97DDDC3E1CA19C35CA659EDC2FC4E3
              BF20CB896EE37E098A906313271422162CB6C642
              75C1201F#
      and not
             #CC1092495D867E64065DEE3E7955F2EBC7D47A2D
              7C9953388F97DDDC3E1CA19C35CA659EDC2FC2E6
              C88FE299D52D78BE405A97E01FD71DD7819ECB91
              FA85A076#
      as stated in the standard.  This seems to be a bug in X9.31.
    */
 
   {
     gcry_mpi_t val_2 = mpi_alloc_set_ui (2);
     gcry_mpi_t gcdtmp = mpi_alloc_like (yp0);
     int gcdres;
 
     mpi_sub_ui (p1p2, p1p2, 1); /* Adjust for loop body.  */
     mpi_sub_ui (yp0, yp0, 1);   /* Ditto.  */
     for (;;)
       {
         gcdres = mpi_gcd (gcdtmp, e, yp0);
         mpi_add_ui (yp0, yp0, 1);
         if (!gcdres)
           progress ('/');  /* gcd (e, yp0-1) != 1  */
         else if (check_prime (yp0, val_2, 64, NULL, NULL))
           break; /* Found.  */
         /* We add p1p2-1 because yp0 is incremented after the gcd test.  */
         mpi_add (yp0, yp0, p1p2);
       }
     mpi_free (gcdtmp);
     mpi_free (val_2);
   }
 
   mpi_free (p1p2);
 
   progress('\n');
   if (r_p1)
     *r_p1 = p1;
   else
     mpi_free (p1);
   if (r_p2)
     *r_p2 = p2;
   else
     mpi_free (p2);
   return yp0;
 }
 
 
 
 /* Generate the two prime used for DSA using the algorithm specified
    in FIPS 186-2.  PBITS is the desired length of the prime P and a
    QBITS the length of the prime Q.  If SEED is not supplied and
    SEEDLEN is 0 the function generates an appropriate SEED.  On
    success the generated primes are stored at R_Q and R_P, the counter
    value is stored at R_COUNTER and the seed actually used for
    generation is stored at R_SEED and R_SEEDVALUE.  */
 gpg_err_code_t
 _gcry_generate_fips186_2_prime (unsigned int pbits, unsigned int qbits,
                                 const void *seed, size_t seedlen,
                                 gcry_mpi_t *r_q, gcry_mpi_t *r_p,
                                 int *r_counter,
                                 void **r_seed, size_t *r_seedlen)
 {
   gpg_err_code_t ec;
   unsigned char seed_help_buffer[160/8];  /* Used to hold a generated SEED. */
   unsigned char *seed_plus;     /* Malloced buffer to hold SEED+x.  */
   unsigned char digest[160/8];  /* Helper buffer for SHA-1 digest.  */
   gcry_mpi_t val_2 = NULL;      /* Helper for the prime test.  */
   gcry_mpi_t tmpval = NULL;     /* Helper variable.  */
   int i;
 
   unsigned char value_u[160/8];
   int value_n, value_b, value_k;
   int counter;
   gcry_mpi_t value_w = NULL;
   gcry_mpi_t value_x = NULL;
   gcry_mpi_t prime_q = NULL;
   gcry_mpi_t prime_p = NULL;
 
   /* FIPS 186-2 allows only for 1024/160 bit.  */
   if (pbits != 1024 || qbits != 160)
     return GPG_ERR_INV_KEYLEN;
 
   if (!seed && !seedlen)
     ; /* No seed value given:  We are asked to generate it.  */
   else if (!seed || seedlen < qbits/8)
     return GPG_ERR_INV_ARG;
 
   /* Allocate a buffer to later compute SEED+some_increment. */
   seed_plus = xtrymalloc (seedlen < 20? 20:seedlen);
   if (!seed_plus)
     {
       ec = gpg_err_code_from_syserror ();
       goto leave;
     }
 
   val_2   = mpi_alloc_set_ui (2);
   value_n = (pbits - 1) / qbits;
   value_b = (pbits - 1) - value_n * qbits;
   value_w = mpi_new (pbits);
   value_x = mpi_new (pbits);
 
  restart:
   /* Generate Q.  */
   for (;;)
     {
       /* Step 1: Generate a (new) seed unless one has been supplied.  */
       if (!seed)
         {
           seedlen = sizeof seed_help_buffer;
           _gcry_create_nonce (seed_help_buffer, seedlen);
           seed = seed_help_buffer;
         }
 
       /* Step 2: U = sha1(seed) ^ sha1((seed+1) mod 2^{qbits})  */
       memcpy (seed_plus, seed, seedlen);
       for (i=seedlen-1; i >= 0; i--)
         {
           seed_plus[i]++;
           if (seed_plus[i])
             break;
         }
       _gcry_md_hash_buffer (GCRY_MD_SHA1, value_u, seed, seedlen);
       _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
       for (i=0; i < sizeof value_u; i++)
         value_u[i] ^= digest[i];
 
       /* Step 3:  Form q from U  */
       _gcry_mpi_release (prime_q); prime_q = NULL;
       ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG,
                            value_u, sizeof value_u, NULL);
       if (ec)
         goto leave;
       mpi_set_highbit (prime_q, qbits-1 );
       mpi_set_bit (prime_q, 0);
 
       /* Step 4:  Test whether Q is prime using 64 round of Rabin-Miller.  */
       if (check_prime (prime_q, val_2, 64, NULL, NULL))
         break; /* Yes, Q is prime.  */
 
       /* Step 5.  */
       seed = NULL;  /* Force a new seed at Step 1.  */
     }
 
   /* Step 6.  Note that we do no use an explicit offset but increment
      SEED_PLUS accordingly.  SEED_PLUS is currently SEED+1.  */
   counter = 0;
 
   /* Generate P. */
   prime_p = mpi_new (pbits);
   for (;;)
     {
       /* Step 7: For k = 0,...n let
                    V_k = sha1(seed+offset+k) mod 2^{qbits}
          Step 8: W = V_0 + V_1*2^160 +
                          ...
                          + V_{n-1}*2^{(n-1)*160}
                          + (V_{n} mod 2^b)*2^{n*160}
        */
       mpi_set_ui (value_w, 0);
       for (value_k=0; value_k <= value_n; value_k++)
         {
           /* There is no need to have an explicit offset variable:  In
              the first round we shall have an offset of 2, this is
              achieved by using SEED_PLUS which is already at SEED+1,
              thus we just need to increment it once again.  The
              requirement for the next round is to update offset by N,
              which we implictly did at the end of this loop, and then
              to add one; this one is the same as in the first round.  */
           for (i=seedlen-1; i >= 0; i--)
             {
               seed_plus[i]++;
               if (seed_plus[i])
                 break;
             }
           _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
 
           _gcry_mpi_release (tmpval); tmpval = NULL;
           ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG,
                                digest, sizeof digest, NULL);
           if (ec)
             goto leave;
           if (value_k == value_n)
             mpi_clear_highbit (tmpval, value_b); /* (V_n mod 2^b) */
           mpi_lshift (tmpval, tmpval, value_k*qbits);
           mpi_add (value_w, value_w, tmpval);
         }
 
       /* Step 8 continued: X = W + 2^{L-1}  */
       mpi_set_ui (value_x, 0);
       mpi_set_highbit (value_x, pbits-1);
       mpi_add (value_x, value_x, value_w);
 
       /* Step 9:  c = X mod 2q,  p = X - (c - 1)  */
       mpi_mul_2exp (tmpval, prime_q, 1);
       mpi_mod (tmpval, value_x, tmpval);
       mpi_sub_ui (tmpval, tmpval, 1);
       mpi_sub (prime_p, value_x, tmpval);
 
       /* Step 10: If  p < 2^{L-1}  skip the primality test.  */
       /* Step 11 and 12: Primality test.  */
       if (mpi_get_nbits (prime_p) >= pbits-1
           && check_prime (prime_p, val_2, 64, NULL, NULL) )
         break; /* Yes, P is prime, continue with Step 15.  */
 
       /* Step 13: counter = counter + 1, offset = offset + n + 1. */
       counter++;
 
       /* Step 14: If counter >= 2^12  goto Step 1.  */
       if (counter >= 4096)
         goto restart;
     }
 
   /* Step 15:  Save p, q, counter and seed.  */
 /*   log_debug ("fips186-2 pbits p=%u q=%u counter=%d\n", */
 /*              mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */
 /*   log_printhex("fips186-2 seed:", seed, seedlen); */
 /*   log_mpidump ("fips186-2 prime p", prime_p); */
 /*   log_mpidump ("fips186-2 prime q", prime_q); */
   if (r_q)
     {
       *r_q = prime_q;
       prime_q = NULL;
     }
   if (r_p)
     {
       *r_p = prime_p;
       prime_p = NULL;
     }
   if (r_counter)
     *r_counter = counter;
   if (r_seed && r_seedlen)
     {
       memcpy (seed_plus, seed, seedlen);
       *r_seed = seed_plus;
       seed_plus = NULL;
       *r_seedlen = seedlen;
     }
 
 
  leave:
   _gcry_mpi_release (tmpval);
   _gcry_mpi_release (value_x);
   _gcry_mpi_release (value_w);
   _gcry_mpi_release (prime_p);
   _gcry_mpi_release (prime_q);
   xfree (seed_plus);
   _gcry_mpi_release (val_2);
   return ec;
 }
 
 
 
 /* WARNING: The code below has not yet been tested!
  *
  * Generate the two prime used for DSA using the algorithm specified
  * in FIPS 186-3, A.1.1.2.  PBITS is the desired length of the prime P
  * and a QBITS the length of the prime Q.  If SEED is not supplied and
  * SEEDLEN is 0 the function generates an appropriate SEED.  On
  * success the generated primes are stored at R_Q and R_P, the counter
  * value is stored at R_COUNTER and the seed actually used for
  * generation is stored at R_SEED and R_SEEDVALUE.  The hash algorithm
  * used is stored at R_HASHALGO.
  *
  * Note that this function is very similar to the fips186_2 code.  Due
  * to the minor differences, other buffer sizes and for documentarion,
  * we use a separate function.
  */
 gpg_err_code_t
 _gcry_generate_fips186_3_prime (unsigned int pbits, unsigned int qbits,
                                 const void *seed, size_t seedlen,
                                 gcry_mpi_t *r_q, gcry_mpi_t *r_p,
                                 int *r_counter,
                                 void **r_seed, size_t *r_seedlen,
                                 int *r_hashalgo)
 {
   gpg_err_code_t ec;
   unsigned char seed_help_buffer[256/8];  /* Used to hold a generated SEED. */
   unsigned char *seed_plus;     /* Malloced buffer to hold SEED+x.  */
   unsigned char digest[256/8];  /* Helper buffer for SHA-2 digest.  */
   gcry_mpi_t val_2 = NULL;      /* Helper for the prime test.  */
   gcry_mpi_t tmpval = NULL;     /* Helper variable.  */
   int hashalgo;                 /* The id of the Approved Hash Function.  */
   int i;
 
   unsigned char value_u[256/8];
   int value_n, value_b, value_j;
   int counter;
   gcry_mpi_t value_w = NULL;
   gcry_mpi_t value_x = NULL;
   gcry_mpi_t prime_q = NULL;
   gcry_mpi_t prime_p = NULL;
 
   gcry_assert (sizeof seed_help_buffer == sizeof digest
                && sizeof seed_help_buffer == sizeof value_u);
 
   /* Step 1:  Check the requested prime lengths.  */
   /* Note that due to the size of our buffers QBITS is limited to 256.  */
   if (pbits == 2048 && qbits == 224)
     hashalgo = GCRY_MD_SHA224;
   else if (pbits == 2048 && qbits == 256)
     hashalgo = GCRY_MD_SHA256;
   else if (pbits == 3072 && qbits == 256)
     hashalgo = GCRY_MD_SHA256;
   else
     return GPG_ERR_INV_KEYLEN;
 
   /* Also check that the hash algorithm is available.  */
   ec = _gcry_md_test_algo (hashalgo);
   if (ec)
     return ec;
   gcry_assert (qbits/8 <= sizeof digest);
   gcry_assert (_gcry_md_get_algo_dlen (hashalgo) == qbits/8);
 
 
   /* Step 2:  Check seedlen.  */
   if (!seed && !seedlen)
     ; /* No seed value given:  We are asked to generate it.  */
   else if (!seed || seedlen < qbits/8)
     return GPG_ERR_INV_ARG;
 
   /* Allocate a buffer to later compute SEED+some_increment and a few
      helper variables.  */
   seed_plus = xtrymalloc (seedlen < sizeof seed_help_buffer?
                           sizeof seed_help_buffer : seedlen);
   if (!seed_plus)
     {
       ec = gpg_err_code_from_syserror ();
       goto leave;
     }
   val_2   = mpi_alloc_set_ui (2);
   value_w = mpi_new (pbits);
   value_x = mpi_new (pbits);
 
   /* Step 3: n = \lceil L / outlen \rceil - 1  */
   value_n = (pbits + qbits - 1) / qbits - 1;
   /* Step 4: b = L - 1 - (n * outlen)  */
   value_b = pbits - 1 - (value_n * qbits);
 
  restart:
   /* Generate Q.  */
   for (;;)
     {
       /* Step 5:  Generate a (new) seed unless one has been supplied.  */
       if (!seed)
         {
           seedlen = qbits/8;
           gcry_assert (seedlen <= sizeof seed_help_buffer);
           _gcry_create_nonce (seed_help_buffer, seedlen);
           seed = seed_help_buffer;
         }
 
       /* Step 6:  U = hash(seed)  */
       _gcry_md_hash_buffer (hashalgo, value_u, seed, seedlen);
 
       /* Step 7:  q = 2^{N-1} + U + 1 - (U mod 2)  */
       if ( !(value_u[qbits/8-1] & 0x01) )
         {
           for (i=qbits/8-1; i >= 0; i--)
             {
               value_u[i]++;
               if (value_u[i])
                 break;
             }
         }
       _gcry_mpi_release (prime_q); prime_q = NULL;
       ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG,
                            value_u, qbits/8, NULL);
       if (ec)
         goto leave;
       mpi_set_highbit (prime_q, qbits-1 );
 
       /* Step 8:  Test whether Q is prime using 64 round of Rabin-Miller.
                   According to table C.1 this is sufficient for all
                   supported prime sizes (i.e. up 3072/256).  */
       if (check_prime (prime_q, val_2, 64, NULL, NULL))
         break; /* Yes, Q is prime.  */
 
       /* Step 8.  */
       seed = NULL;  /* Force a new seed at Step 5.  */
     }
 
   /* Step 11.  Note that we do no use an explicit offset but increment
      SEED_PLUS accordingly.  */
   memcpy (seed_plus, seed, seedlen);
   counter = 0;
 
   /* Generate P. */
   prime_p = mpi_new (pbits);
   for (;;)
     {
       /* Step 11.1: For j = 0,...n let
                       V_j = hash(seed+offset+j)
          Step 11.2: W = V_0 + V_1*2^outlen +
                             ...
                             + V_{n-1}*2^{(n-1)*outlen}
                             + (V_{n} mod 2^b)*2^{n*outlen}
        */
       mpi_set_ui (value_w, 0);
       for (value_j=0; value_j <= value_n; value_j++)
         {
           /* There is no need to have an explicit offset variable: In
              the first round we shall have an offset of 1 and a j of
              0.  This is achieved by incrementing SEED_PLUS here.  For
              the next round offset is implicitly updated by using
              SEED_PLUS again.  */
           for (i=seedlen-1; i >= 0; i--)
             {
               seed_plus[i]++;
               if (seed_plus[i])
                 break;
             }
           _gcry_md_hash_buffer (hashalgo, digest, seed_plus, seedlen);
 
           _gcry_mpi_release (tmpval); tmpval = NULL;
           ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG,
                                digest, qbits/8, NULL);
           if (ec)
             goto leave;
           if (value_j == value_n)
             mpi_clear_highbit (tmpval, value_b); /* (V_n mod 2^b) */
           mpi_lshift (tmpval, tmpval, value_j*qbits);
           mpi_add (value_w, value_w, tmpval);
         }
 
       /* Step 11.3: X = W + 2^{L-1}  */
       mpi_set_ui (value_x, 0);
       mpi_set_highbit (value_x, pbits-1);
       mpi_add (value_x, value_x, value_w);
 
       /* Step 11.4:  c = X mod 2q  */
       mpi_mul_2exp (tmpval, prime_q, 1);
       mpi_mod (tmpval, value_x, tmpval);
 
       /* Step 11.5:  p = X - (c - 1)  */
       mpi_sub_ui (tmpval, tmpval, 1);
       mpi_sub (prime_p, value_x, tmpval);
 
       /* Step 11.6: If  p < 2^{L-1}  skip the primality test.  */
       /* Step 11.7 and 11.8: Primality test.  */
       if (mpi_get_nbits (prime_p) >= pbits-1
           && check_prime (prime_p, val_2, 64, NULL, NULL) )
         break; /* Yes, P is prime, continue with Step 15.  */
 
       /* Step 11.9: counter = counter + 1, offset = offset + n + 1.
                     If counter >= 4L  goto Step 5.  */
       counter++;
       if (counter >= 4*pbits)
         goto restart;
     }
 
   /* Step 12:  Save p, q, counter and seed.  */
   /* log_debug ("fips186-3 pbits p=%u q=%u counter=%d\n", */
   /*            mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */
   /* log_printhex ("fips186-3 seed", seed, seedlen); */
   /* log_printmpi ("fips186-3    p", prime_p); */
   /* log_printmpi ("fips186-3    q", prime_q); */
 
   if (r_q)
     {
       *r_q = prime_q;
       prime_q = NULL;
     }
   if (r_p)
     {
       *r_p = prime_p;
       prime_p = NULL;
     }
   if (r_counter)
     *r_counter = counter;
   if (r_seed && r_seedlen)
     {
       memcpy (seed_plus, seed, seedlen);
       *r_seed = seed_plus;
       seed_plus = NULL;
       *r_seedlen = seedlen;
     }
   if (r_hashalgo)
     *r_hashalgo = hashalgo;
 
  leave:
   _gcry_mpi_release (tmpval);
   _gcry_mpi_release (value_x);
   _gcry_mpi_release (value_w);
   _gcry_mpi_release (prime_p);
   _gcry_mpi_release (prime_q);
   xfree (seed_plus);
   _gcry_mpi_release (val_2);
   return ec;
 }
diff --git a/cipher/rfc2268.c b/cipher/rfc2268.c
index c270ce9b..1d516917 100644
--- a/cipher/rfc2268.c
+++ b/cipher/rfc2268.c
@@ -1,381 +1,381 @@
 /* rfc2268.c  - The cipher described in rfc2268; aka Ron's Cipher 2.
  * Copyright (C) 2003 Nikos Mavroyanopoulos
  * Copyright (C) 2004 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 /* This implementation was written by Nikos Mavroyanopoulos for GNUTLS
  * as a Libgcrypt module (gnutls/lib/x509/rc2.c) and later adapted for
  * direct use by Libgcrypt by Werner Koch.  This implementation is
  * only useful for pkcs#12 decryption.
  *
  * The implementation here is based on Peter Gutmann's RRC.2 paper.
  */
 
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "g10lib.h"
 #include "types.h"
 #include "cipher.h"
 #include "cipher-internal.h"
 
 #define RFC2268_BLOCKSIZE 8
 
 typedef struct
 {
   u16 S[64];
 } RFC2268_context;
 
 static const unsigned char rfc2268_sbox[] = {
   217, 120, 249, 196,  25, 221, 181, 237,
    40, 233, 253, 121,  74, 160, 216, 157,
   198, 126,  55, 131,  43, 118,  83, 142,
    98,  76, 100, 136,  68, 139, 251, 162,
    23, 154,  89, 245, 135, 179,  79,  19,
    97,  69, 109, 141,   9, 129, 125,  50,
   189, 143,  64, 235, 134, 183, 123,  11,
   240, 149,  33,  34,  92, 107,  78, 130,
    84, 214, 101, 147, 206,  96, 178,  28,
   115,  86, 192,  20, 167, 140, 241, 220,
    18, 117, 202,  31,  59, 190, 228, 209,
    66,  61, 212,  48, 163,  60, 182,  38,
   111, 191,  14, 218,  70, 105,   7,  87,
    39, 242,  29, 155, 188, 148,  67,   3,
   248,  17, 199, 246, 144, 239,  62, 231,
     6, 195, 213,  47, 200, 102,  30, 215,
     8, 232, 234, 222, 128,  82, 238, 247,
   132, 170, 114, 172,  53,  77, 106,  42,
   150,  26, 210, 113,  90,  21,  73, 116,
    75, 159, 208,  94,   4,  24, 164, 236,
   194, 224,  65, 110,  15,  81, 203, 204,
    36, 145, 175,  80, 161, 244, 112,  57,
   153, 124,  58, 133,  35, 184, 180, 122,
   252,   2,  54,  91,  37,  85, 151,  49,
    45,  93, 250, 152, 227, 138, 146, 174,
     5, 223,  41,  16, 103, 108, 186, 201,
   211,   0, 230, 207, 225, 158, 168,  44,
    99,  22,   1,  63,  88, 226, 137, 169,
    13,  56,  52,  27, 171,  51, 255, 176,
   187,  72,  12,  95, 185, 177, 205,  46,
   197, 243, 219,  71, 229, 165, 156, 119,
    10, 166,  32, 104, 254, 127, 193, 173
 };
 
 #define rotl16(x,n)   (((x) << ((u16)(n))) | ((x) >> (16 - (u16)(n))))
 #define rotr16(x,n)   (((x) >> ((u16)(n))) | ((x) << (16 - (u16)(n))))
 
 static const char *selftest (void);
 
 
 static void
 do_encrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf)
 {
   RFC2268_context *ctx = context;
   register int i, j;
   u16 word0 = 0, word1 = 0, word2 = 0, word3 = 0;
 
   word0 = (word0 << 8) | inbuf[1];
   word0 = (word0 << 8) | inbuf[0];
   word1 = (word1 << 8) | inbuf[3];
   word1 = (word1 << 8) | inbuf[2];
   word2 = (word2 << 8) | inbuf[5];
   word2 = (word2 << 8) | inbuf[4];
   word3 = (word3 << 8) | inbuf[7];
   word3 = (word3 << 8) | inbuf[6];
 
   for (i = 0; i < 16; i++)
     {
       j = i * 4;
       /* For some reason I cannot combine those steps. */
       word0 += (word1 & ~word3) + (word2 & word3) + ctx->S[j];
       word0 = rotl16(word0, 1);
 
       word1 += (word2 & ~word0) + (word3 & word0) + ctx->S[j + 1];
       word1 = rotl16(word1, 2);
 
       word2 += (word3 & ~word1) + (word0 & word1) + ctx->S[j + 2];
       word2 = rotl16(word2, 3);
 
       word3 += (word0 & ~word2) + (word1 & word2) + ctx->S[j + 3];
       word3 = rotl16(word3, 5);
 
       if (i == 4 || i == 10)
         {
           word0 += ctx->S[word3 & 63];
           word1 += ctx->S[word0 & 63];
           word2 += ctx->S[word1 & 63];
           word3 += ctx->S[word2 & 63];
         }
 
     }
 
   outbuf[0] = word0 & 255;
   outbuf[1] = word0 >> 8;
   outbuf[2] = word1 & 255;
   outbuf[3] = word1 >> 8;
   outbuf[4] = word2 & 255;
   outbuf[5] = word2 >> 8;
   outbuf[6] = word3 & 255;
   outbuf[7] = word3 >> 8;
 }
 
 static unsigned int
 encrypt_block (void *context, unsigned char *outbuf, const unsigned char *inbuf)
 {
   do_encrypt (context, outbuf, inbuf);
   return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 4);
 }
 
 static void
 do_decrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf)
 {
   RFC2268_context *ctx = context;
   register int i, j;
   u16 word0 = 0, word1 = 0, word2 = 0, word3 = 0;
 
   word0 = (word0 << 8) | inbuf[1];
   word0 = (word0 << 8) | inbuf[0];
   word1 = (word1 << 8) | inbuf[3];
   word1 = (word1 << 8) | inbuf[2];
   word2 = (word2 << 8) | inbuf[5];
   word2 = (word2 << 8) | inbuf[4];
   word3 = (word3 << 8) | inbuf[7];
   word3 = (word3 << 8) | inbuf[6];
 
   for (i = 15; i >= 0; i--)
     {
       j = i * 4;
 
       word3 = rotr16(word3, 5);
       word3 -= (word0 & ~word2) + (word1 & word2) + ctx->S[j + 3];
 
       word2 = rotr16(word2, 3);
       word2 -= (word3 & ~word1) + (word0 & word1) + ctx->S[j + 2];
 
       word1 = rotr16(word1, 2);
       word1 -= (word2 & ~word0) + (word3 & word0) + ctx->S[j + 1];
 
       word0 = rotr16(word0, 1);
       word0 -= (word1 & ~word3) + (word2 & word3) + ctx->S[j];
 
       if (i == 5 || i == 11)
         {
           word3 = word3 - ctx->S[word2 & 63];
           word2 = word2 - ctx->S[word1 & 63];
           word1 = word1 - ctx->S[word0 & 63];
           word0 = word0 - ctx->S[word3 & 63];
         }
 
     }
 
   outbuf[0] = word0 & 255;
   outbuf[1] = word0 >> 8;
   outbuf[2] = word1 & 255;
   outbuf[3] = word1 >> 8;
   outbuf[4] = word2 & 255;
   outbuf[5] = word2 >> 8;
   outbuf[6] = word3 & 255;
   outbuf[7] = word3 >> 8;
 }
 
 static unsigned int
 decrypt_block (void *context, unsigned char *outbuf, const unsigned char *inbuf)
 {
   do_decrypt (context, outbuf, inbuf);
   return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 4);
 }
 
 
 static gpg_err_code_t
 setkey_core (void *context, const unsigned char *key, unsigned int keylen, int with_phase2)
 {
   static int initialized;
   static const char *selftest_failed;
   RFC2268_context *ctx = context;
   unsigned int i;
   unsigned char *S, x;
   int len;
   int bits = keylen * 8;
 
   if (!initialized)
     {
       initialized = 1;
       selftest_failed = selftest ();
       if (selftest_failed)
         log_error ("RFC2268 selftest failed (%s).\n", selftest_failed);
     }
   if (selftest_failed)
     return GPG_ERR_SELFTEST_FAILED;
 
   if (keylen < 40 / 8)	/* We want at least 40 bits. */
     return GPG_ERR_INV_KEYLEN;
 
   if (keylen > 128)
     return GPG_ERR_INV_KEYLEN;
 
   S = (unsigned char *) ctx->S;
 
   for (i = 0; i < keylen; i++)
     S[i] = key[i];
 
   for (i = keylen; i < 128; i++)
     S[i] = rfc2268_sbox[(S[i - keylen] + S[i - 1]) & 255];
 
   S[0] = rfc2268_sbox[S[0]];
 
   /* Phase 2 - reduce effective key size to "bits". This was not
    * discussed in Gutmann's paper. I've copied that from the public
    * domain code posted in sci.crypt. */
   if (with_phase2)
     {
       len = (bits + 7) >> 3;
       i = 128 - len;
       x = rfc2268_sbox[S[i] & (255 >> (7 & -bits))];
       S[i] = x;
 
       while (i--)
         {
           x = rfc2268_sbox[x ^ S[i + len]];
           S[i] = x;
         }
     }
 
   /* Make the expanded key, endian independent. */
   for (i = 0; i < 64; i++)
     ctx->S[i] = ( (u16) S[i * 2] | (((u16) S[i * 2 + 1]) << 8));
 
   return 0;
 }
 
 static gpg_err_code_t
 do_setkey (void *context, const unsigned char *key, unsigned int keylen,
            cipher_bulk_ops_t *bulk_ops)
 {
   (void)bulk_ops;
   return setkey_core (context, key, keylen, 1);
 }
 
 static const char *
 selftest (void)
 {
   RFC2268_context ctx;
   unsigned char scratch[16];
 
   /* Test vectors from Peter Gutmann's paper. */
   static unsigned char key_1[] =
     { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
     };
   static unsigned char plaintext_1[] =
     { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
   static const unsigned char ciphertext_1[] =
     { 0x1C, 0x19, 0x8A, 0x83, 0x8D, 0xF0, 0x28, 0xB7 };
 
   static unsigned char key_2[] =
     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
       0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
     };
   static unsigned char plaintext_2[] =
     { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
   static unsigned char ciphertext_2[] =
     { 0x50, 0xDC, 0x01, 0x62, 0xBD, 0x75, 0x7F, 0x31 };
 
   /* This one was checked against libmcrypt's RFC2268. */
   static unsigned char key_3[] =
     { 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
     };
   static unsigned char plaintext_3[] =
     { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
   static unsigned char ciphertext_3[] =
     { 0x8f, 0xd1, 0x03, 0x89, 0x33, 0x6b, 0xf9, 0x5e };
 
 
   /* First test. */
   setkey_core (&ctx, key_1, sizeof(key_1), 0);
   do_encrypt (&ctx, scratch, plaintext_1);
 
   if (memcmp (scratch, ciphertext_1, sizeof(ciphertext_1)))
     return "RFC2268 encryption test 1 failed.";
 
   setkey_core (&ctx, key_1, sizeof(key_1), 0);
   do_decrypt (&ctx, scratch, scratch);
   if (memcmp (scratch, plaintext_1, sizeof(plaintext_1)))
     return "RFC2268 decryption test 1 failed.";
 
   /* Second test. */
   setkey_core (&ctx, key_2, sizeof(key_2), 0);
   do_encrypt (&ctx, scratch, plaintext_2);
   if (memcmp (scratch, ciphertext_2, sizeof(ciphertext_2)))
     return "RFC2268 encryption test 2 failed.";
 
   setkey_core (&ctx, key_2, sizeof(key_2), 0);
   do_decrypt (&ctx, scratch, scratch);
   if (memcmp (scratch, plaintext_2, sizeof(plaintext_2)))
     return "RFC2268 decryption test 2 failed.";
 
   /* Third test. */
   setkey_core(&ctx, key_3, sizeof(key_3), 0);
   do_encrypt(&ctx, scratch, plaintext_3);
 
   if (memcmp(scratch, ciphertext_3, sizeof(ciphertext_3)))
     return "RFC2268 encryption test 3 failed.";
 
   setkey_core (&ctx, key_3, sizeof(key_3), 0);
   do_decrypt (&ctx, scratch, scratch);
   if (memcmp(scratch, plaintext_3, sizeof(plaintext_3)))
     return "RFC2268 decryption test 3 failed.";
 
   return NULL;
 }
 
 
 
 static const gcry_cipher_oid_spec_t oids_rfc2268_40[] =
   {
     /*{ "1.2.840.113549.3.2", GCRY_CIPHER_MODE_CBC },*/
     /* pbeWithSHAAnd40BitRC2_CBC */
     { "1.2.840.113549.1.12.1.6", GCRY_CIPHER_MODE_CBC },
     { NULL }
   };
 
 static const gcry_cipher_oid_spec_t oids_rfc2268_128[] =
   {
     /* pbeWithSHAAnd128BitRC2_CBC */
     { "1.2.840.113549.1.12.1.5", GCRY_CIPHER_MODE_CBC },
     { NULL }
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40 =
   {
     GCRY_CIPHER_RFC2268_40, {0, 0},
     "RFC2268_40", NULL, oids_rfc2268_40,
     RFC2268_BLOCKSIZE, 40, sizeof(RFC2268_context),
     do_setkey, encrypt_block, decrypt_block
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_128 =
   {
     GCRY_CIPHER_RFC2268_128, {0, 0},
     "RFC2268_128", NULL, oids_rfc2268_128,
     RFC2268_BLOCKSIZE, 128, sizeof(RFC2268_context),
     do_setkey, encrypt_block, decrypt_block
   };
diff --git a/cipher/rmd160.c b/cipher/rmd160.c
index 5c54fdff..6c03d4c7 100644
--- a/cipher/rmd160.c
+++ b/cipher/rmd160.c
@@ -1,520 +1,520 @@
 /* rmd160.c  -	RIPE-MD160
  * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 #include "hash-common.h"
 #include "cipher.h" /* Only used for the rmd160_hash_buffer() prototype. */
 
 #include "bithelp.h"
 #include "bufhelp.h"
 
 /*********************************
  * RIPEMD-160 is not patented, see (as of 25.10.97)
  *   http://www.esat.kuleuven.ac.be/~bosselae/ripemd160.html
  * Note that the code uses Little Endian byteorder, which is good for
  * 386 etc, but we must add some conversion when used on a big endian box.
  *
  *
  * Pseudo-code for RIPEMD-160
  *
  * RIPEMD-160 is an iterative hash function that operates on 32-bit words.
  * The round function takes as input a 5-word chaining variable and a 16-word
  * message block and maps this to a new chaining variable. All operations are
  * defined on 32-bit words. Padding is identical to that of MD4.
  *
  *
  * RIPEMD-160: definitions
  *
  *
  *   nonlinear functions at bit level: exor, mux, -, mux, -
  *
  *   f(j, x, y, z) = x XOR y XOR z		  (0 <= j <= 15)
  *   f(j, x, y, z) = (x AND y) OR (NOT(x) AND z)  (16 <= j <= 31)
  *   f(j, x, y, z) = (x OR NOT(y)) XOR z	  (32 <= j <= 47)
  *   f(j, x, y, z) = (x AND z) OR (y AND NOT(z))  (48 <= j <= 63)
  *   f(j, x, y, z) = x XOR (y OR NOT(z))	  (64 <= j <= 79)
  *
  *
  *   added constants (hexadecimal)
  *
  *   K(j) = 0x00000000	    (0 <= j <= 15)
  *   K(j) = 0x5A827999	   (16 <= j <= 31)	int(2**30 x sqrt(2))
  *   K(j) = 0x6ED9EBA1	   (32 <= j <= 47)	int(2**30 x sqrt(3))
  *   K(j) = 0x8F1BBCDC	   (48 <= j <= 63)	int(2**30 x sqrt(5))
  *   K(j) = 0xA953FD4E	   (64 <= j <= 79)	int(2**30 x sqrt(7))
  *   K'(j) = 0x50A28BE6     (0 <= j <= 15)      int(2**30 x cbrt(2))
  *   K'(j) = 0x5C4DD124    (16 <= j <= 31)      int(2**30 x cbrt(3))
  *   K'(j) = 0x6D703EF3    (32 <= j <= 47)      int(2**30 x cbrt(5))
  *   K'(j) = 0x7A6D76E9    (48 <= j <= 63)      int(2**30 x cbrt(7))
  *   K'(j) = 0x00000000    (64 <= j <= 79)
  *
  *
  *   selection of message word
  *
  *   r(j)      = j		      (0 <= j <= 15)
  *   r(16..31) = 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8
  *   r(32..47) = 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12
  *   r(48..63) = 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2
  *   r(64..79) = 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13
  *   r0(0..15) = 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12
  *   r0(16..31)= 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2
  *   r0(32..47)= 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13
  *   r0(48..63)= 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14
  *   r0(64..79)= 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11
  *
  *
  *   amount for rotate left (rol)
  *
  *   s(0..15)  = 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8
  *   s(16..31) = 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12
  *   s(32..47) = 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5
  *   s(48..63) = 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12
  *   s(64..79) = 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6
  *   s'(0..15) = 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6
  *   s'(16..31)= 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11
  *   s'(32..47)= 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5
  *   s'(48..63)= 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8
  *   s'(64..79)= 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11
  *
  *
  *   initial value (hexadecimal)
  *
  *   h0 = 0x67452301; h1 = 0xEFCDAB89; h2 = 0x98BADCFE; h3 = 0x10325476;
  *							h4 = 0xC3D2E1F0;
  *
  *
  * RIPEMD-160: pseudo-code
  *
  *   It is assumed that the message after padding consists of t 16-word blocks
  *   that will be denoted with X[i][j], with 0 <= i <= t-1 and 0 <= j <= 15.
  *   The symbol [+] denotes addition modulo 2**32 and rol_s denotes cyclic left
  *   shift (rotate) over s positions.
  *
  *
  *   for i := 0 to t-1 {
  *	 A := h0; B := h1; C := h2; D = h3; E = h4;
  *	 A' := h0; B' := h1; C' := h2; D' = h3; E' = h4;
  *	 for j := 0 to 79 {
  *	     T := rol_s(j)(A [+] f(j, B, C, D) [+] X[i][r(j)] [+] K(j)) [+] E;
  *	     A := E; E := D; D := rol_10(C); C := B; B := T;
  *	     T := rol_s'(j)(A' [+] f(79-j, B', C', D') [+] X[i][r'(j)]
 						       [+] K'(j)) [+] E';
  *	     A' := E'; E' := D'; D' := rol_10(C'); C' := B'; B' := T;
  *	 }
  *	 T := h1 [+] C [+] D'; h1 := h2 [+] D [+] E'; h2 := h3 [+] E [+] A';
  *	 h3 := h4 [+] A [+] B'; h4 := h0 [+] B [+] C'; h0 := T;
  *   }
  */
 
 /* Some examples:
  * ""                    9c1185a5c5e9fc54612808977ee8f548b2258d31
  * "a"                   0bdc9d2d256b3ee9daae347be6f4dc835a467ffe
  * "abc"                 8eb208f7e05d987a9b044a8e98c6b087f15a0bfc
  * "message digest"      5d0689ef49d2fae572b881b123a85ffa21595f36
  * "a...z"               f71c27109c692c1b56bbdceb5b9d2865b3708dbc
  * "abcdbcde...nopq"     12a053384a9c0c88e405a06c27dcf49ada62eb2b
  * "A...Za...z0...9"     b0e20b6e3116640286ed3a87a5713079b21f5189
  * 8 times "1234567890"  9b752e45573d4b39f4dbd3323cab82bf63326bfb
  * 1 million times "a"   52783243c1697bdbe16d37f97f68f08325dc1528
  */
 
 typedef struct
 {
   gcry_md_block_ctx_t bctx;
   u32  h0,h1,h2,h3,h4;
 } RMD160_CONTEXT;
 
 
 static unsigned int
 transform ( void *ctx, const unsigned char *data, size_t nblks );
 
 static void
 rmd160_init (void *context, unsigned int flags)
 {
   RMD160_CONTEXT *hd = context;
 
   (void)flags;
 
   hd->h0 = 0x67452301;
   hd->h1 = 0xEFCDAB89;
   hd->h2 = 0x98BADCFE;
   hd->h3 = 0x10325476;
   hd->h4 = 0xC3D2E1F0;
 
   hd->bctx.nblocks = 0;
   hd->bctx.nblocks_high = 0;
   hd->bctx.count = 0;
   hd->bctx.blocksize_shift = _gcry_ctz(64);
   hd->bctx.bwrite = transform;
 }
 
 
 /****************
  * Transform the message X which consists of 16 32-bit-words
  */
 static unsigned int
 transform_blk ( void *ctx, const unsigned char *data )
 {
   RMD160_CONTEXT *hd = ctx;
   register u32 al, ar, bl, br, cl, cr, dl, dr, el, er;
   u32 x[16];
   int i;
 
   for ( i = 0; i < 16; i++ )
     x[i] = buf_get_le32(data + i * 4);
 
 #define K0  0x00000000
 #define K1  0x5A827999
 #define K2  0x6ED9EBA1
 #define K3  0x8F1BBCDC
 #define K4  0xA953FD4E
 #define KK0 0x50A28BE6
 #define KK1 0x5C4DD124
 #define KK2 0x6D703EF3
 #define KK3 0x7A6D76E9
 #define KK4 0x00000000
 #define F0(x,y,z)   ( (x) ^ (y) ^ (z) )
 #define F1(x,y,z)   ( ((x) & (y)) | (~(x) & (z)) )
 #define F2(x,y,z)   ( ((x) | ~(y)) ^ (z) )
 #define F3(x,y,z)   ( ((x) & (z)) | ((y) & ~(z)) )
 #define F4(x,y,z)   ( (x) ^ ((y) | ~(z)) )
 #define R(a,b,c,d,e,f,k,r,s) do { a += f(b,c,d) + k + x[r]; \
 				  a = rol(a,s) + e;	       \
 				  c = rol(c,10);	       \
 				} while(0)
 
   /* left lane and right lanes interleaved */
   al = ar = hd->h0;
   bl = br = hd->h1;
   cl = cr = hd->h2;
   dl = dr = hd->h3;
   el = er = hd->h4;
   R( al, bl, cl, dl, el, F0, K0,  0, 11 );
   R( ar, br, cr, dr, er, F4, KK0,	5,  8);
   R( el, al, bl, cl, dl, F0, K0,  1, 14 );
   R( er, ar, br, cr, dr, F4, KK0, 14,  9);
   R( dl, el, al, bl, cl, F0, K0,  2, 15 );
   R( dr, er, ar, br, cr, F4, KK0,	7,  9);
   R( cl, dl, el, al, bl, F0, K0,  3, 12 );
   R( cr, dr, er, ar, br, F4, KK0,	0, 11);
   R( bl, cl, dl, el, al, F0, K0,  4,  5 );
   R( br, cr, dr, er, ar, F4, KK0,	9, 13);
   R( al, bl, cl, dl, el, F0, K0,  5,  8 );
   R( ar, br, cr, dr, er, F4, KK0,	2, 15);
   R( el, al, bl, cl, dl, F0, K0,  6,  7 );
   R( er, ar, br, cr, dr, F4, KK0, 11, 15);
   R( dl, el, al, bl, cl, F0, K0,  7,  9 );
   R( dr, er, ar, br, cr, F4, KK0,	4,  5);
   R( cl, dl, el, al, bl, F0, K0,  8, 11 );
   R( cr, dr, er, ar, br, F4, KK0, 13,  7);
   R( bl, cl, dl, el, al, F0, K0,  9, 13 );
   R( br, cr, dr, er, ar, F4, KK0,	6,  7);
   R( al, bl, cl, dl, el, F0, K0, 10, 14 );
   R( ar, br, cr, dr, er, F4, KK0, 15,  8);
   R( el, al, bl, cl, dl, F0, K0, 11, 15 );
   R( er, ar, br, cr, dr, F4, KK0,	8, 11);
   R( dl, el, al, bl, cl, F0, K0, 12,  6 );
   R( dr, er, ar, br, cr, F4, KK0,	1, 14);
   R( cl, dl, el, al, bl, F0, K0, 13,  7 );
   R( cr, dr, er, ar, br, F4, KK0, 10, 14);
   R( bl, cl, dl, el, al, F0, K0, 14,  9 );
   R( br, cr, dr, er, ar, F4, KK0,	3, 12);
   R( al, bl, cl, dl, el, F0, K0, 15,  8 );
   R( ar, br, cr, dr, er, F4, KK0, 12,  6);
   R( el, al, bl, cl, dl, F1, K1,  7,  7 );
   R( er, ar, br, cr, dr, F3, KK1,	6,  9);
   R( dl, el, al, bl, cl, F1, K1,  4,  6 );
   R( dr, er, ar, br, cr, F3, KK1, 11, 13);
   R( cl, dl, el, al, bl, F1, K1, 13,  8 );
   R( cr, dr, er, ar, br, F3, KK1,	3, 15);
   R( bl, cl, dl, el, al, F1, K1,  1, 13 );
   R( br, cr, dr, er, ar, F3, KK1,	7,  7);
   R( al, bl, cl, dl, el, F1, K1, 10, 11 );
   R( ar, br, cr, dr, er, F3, KK1,	0, 12);
   R( el, al, bl, cl, dl, F1, K1,  6,  9 );
   R( er, ar, br, cr, dr, F3, KK1, 13,  8);
   R( dl, el, al, bl, cl, F1, K1, 15,  7 );
   R( dr, er, ar, br, cr, F3, KK1,	5,  9);
   R( cl, dl, el, al, bl, F1, K1,  3, 15 );
   R( cr, dr, er, ar, br, F3, KK1, 10, 11);
   R( bl, cl, dl, el, al, F1, K1, 12,  7 );
   R( br, cr, dr, er, ar, F3, KK1, 14,  7);
   R( al, bl, cl, dl, el, F1, K1,  0, 12 );
   R( ar, br, cr, dr, er, F3, KK1, 15,  7);
   R( el, al, bl, cl, dl, F1, K1,  9, 15 );
   R( er, ar, br, cr, dr, F3, KK1,	8, 12);
   R( dl, el, al, bl, cl, F1, K1,  5,  9 );
   R( dr, er, ar, br, cr, F3, KK1, 12,  7);
   R( cl, dl, el, al, bl, F1, K1,  2, 11 );
   R( cr, dr, er, ar, br, F3, KK1,	4,  6);
   R( bl, cl, dl, el, al, F1, K1, 14,  7 );
   R( br, cr, dr, er, ar, F3, KK1,	9, 15);
   R( al, bl, cl, dl, el, F1, K1, 11, 13 );
   R( ar, br, cr, dr, er, F3, KK1,	1, 13);
   R( el, al, bl, cl, dl, F1, K1,  8, 12 );
   R( er, ar, br, cr, dr, F3, KK1,	2, 11);
   R( dl, el, al, bl, cl, F2, K2,  3, 11 );
   R( dr, er, ar, br, cr, F2, KK2, 15,  9);
   R( cl, dl, el, al, bl, F2, K2, 10, 13 );
   R( cr, dr, er, ar, br, F2, KK2,	5,  7);
   R( bl, cl, dl, el, al, F2, K2, 14,  6 );
   R( br, cr, dr, er, ar, F2, KK2,	1, 15);
   R( al, bl, cl, dl, el, F2, K2,  4,  7 );
   R( ar, br, cr, dr, er, F2, KK2,	3, 11);
   R( el, al, bl, cl, dl, F2, K2,  9, 14 );
   R( er, ar, br, cr, dr, F2, KK2,	7,  8);
   R( dl, el, al, bl, cl, F2, K2, 15,  9 );
   R( dr, er, ar, br, cr, F2, KK2, 14,  6);
   R( cl, dl, el, al, bl, F2, K2,  8, 13 );
   R( cr, dr, er, ar, br, F2, KK2,	6,  6);
   R( bl, cl, dl, el, al, F2, K2,  1, 15 );
   R( br, cr, dr, er, ar, F2, KK2,	9, 14);
   R( al, bl, cl, dl, el, F2, K2,  2, 14 );
   R( ar, br, cr, dr, er, F2, KK2, 11, 12);
   R( el, al, bl, cl, dl, F2, K2,  7,  8 );
   R( er, ar, br, cr, dr, F2, KK2,	8, 13);
   R( dl, el, al, bl, cl, F2, K2,  0, 13 );
   R( dr, er, ar, br, cr, F2, KK2, 12,  5);
   R( cl, dl, el, al, bl, F2, K2,  6,  6 );
   R( cr, dr, er, ar, br, F2, KK2,	2, 14);
   R( bl, cl, dl, el, al, F2, K2, 13,  5 );
   R( br, cr, dr, er, ar, F2, KK2, 10, 13);
   R( al, bl, cl, dl, el, F2, K2, 11, 12 );
   R( ar, br, cr, dr, er, F2, KK2,	0, 13);
   R( el, al, bl, cl, dl, F2, K2,  5,  7 );
   R( er, ar, br, cr, dr, F2, KK2,	4,  7);
   R( dl, el, al, bl, cl, F2, K2, 12,  5 );
   R( dr, er, ar, br, cr, F2, KK2, 13,  5);
   R( cl, dl, el, al, bl, F3, K3,  1, 11 );
   R( cr, dr, er, ar, br, F1, KK3,	8, 15);
   R( bl, cl, dl, el, al, F3, K3,  9, 12 );
   R( br, cr, dr, er, ar, F1, KK3,	6,  5);
   R( al, bl, cl, dl, el, F3, K3, 11, 14 );
   R( ar, br, cr, dr, er, F1, KK3,	4,  8);
   R( el, al, bl, cl, dl, F3, K3, 10, 15 );
   R( er, ar, br, cr, dr, F1, KK3,	1, 11);
   R( dl, el, al, bl, cl, F3, K3,  0, 14 );
   R( dr, er, ar, br, cr, F1, KK3,	3, 14);
   R( cl, dl, el, al, bl, F3, K3,  8, 15 );
   R( cr, dr, er, ar, br, F1, KK3, 11, 14);
   R( bl, cl, dl, el, al, F3, K3, 12,  9 );
   R( br, cr, dr, er, ar, F1, KK3, 15,  6);
   R( al, bl, cl, dl, el, F3, K3,  4,  8 );
   R( ar, br, cr, dr, er, F1, KK3,	0, 14);
   R( el, al, bl, cl, dl, F3, K3, 13,  9 );
   R( er, ar, br, cr, dr, F1, KK3,	5,  6);
   R( dl, el, al, bl, cl, F3, K3,  3, 14 );
   R( dr, er, ar, br, cr, F1, KK3, 12,  9);
   R( cl, dl, el, al, bl, F3, K3,  7,  5 );
   R( cr, dr, er, ar, br, F1, KK3,	2, 12);
   R( bl, cl, dl, el, al, F3, K3, 15,  6 );
   R( br, cr, dr, er, ar, F1, KK3, 13,  9);
   R( al, bl, cl, dl, el, F3, K3, 14,  8 );
   R( ar, br, cr, dr, er, F1, KK3,	9, 12);
   R( el, al, bl, cl, dl, F3, K3,  5,  6 );
   R( er, ar, br, cr, dr, F1, KK3,	7,  5);
   R( dl, el, al, bl, cl, F3, K3,  6,  5 );
   R( dr, er, ar, br, cr, F1, KK3, 10, 15);
   R( cl, dl, el, al, bl, F3, K3,  2, 12 );
   R( cr, dr, er, ar, br, F1, KK3, 14,  8);
   R( bl, cl, dl, el, al, F4, K4,  4,  9 );
   R( br, cr, dr, er, ar, F0, KK4, 12,  8);
   R( al, bl, cl, dl, el, F4, K4,  0, 15 );
   R( ar, br, cr, dr, er, F0, KK4, 15,  5);
   R( el, al, bl, cl, dl, F4, K4,  5,  5 );
   R( er, ar, br, cr, dr, F0, KK4, 10, 12);
   R( dl, el, al, bl, cl, F4, K4,  9, 11 );
   R( dr, er, ar, br, cr, F0, KK4,	4,  9);
   R( cl, dl, el, al, bl, F4, K4,  7,  6 );
   R( cr, dr, er, ar, br, F0, KK4,	1, 12);
   R( bl, cl, dl, el, al, F4, K4, 12,  8 );
   R( br, cr, dr, er, ar, F0, KK4,	5,  5);
   R( al, bl, cl, dl, el, F4, K4,  2, 13 );
   R( ar, br, cr, dr, er, F0, KK4,	8, 14);
   R( el, al, bl, cl, dl, F4, K4, 10, 12 );
   R( er, ar, br, cr, dr, F0, KK4,	7,  6);
   R( dl, el, al, bl, cl, F4, K4, 14,  5 );
   R( dr, er, ar, br, cr, F0, KK4,	6,  8);
   R( cl, dl, el, al, bl, F4, K4,  1, 12 );
   R( cr, dr, er, ar, br, F0, KK4,	2, 13);
   R( bl, cl, dl, el, al, F4, K4,  3, 13 );
   R( br, cr, dr, er, ar, F0, KK4, 13,  6);
   R( al, bl, cl, dl, el, F4, K4,  8, 14 );
   R( ar, br, cr, dr, er, F0, KK4, 14,  5);
   R( el, al, bl, cl, dl, F4, K4, 11, 11 );
   R( er, ar, br, cr, dr, F0, KK4,	0, 15);
   R( dl, el, al, bl, cl, F4, K4,  6,  8 );
   R( dr, er, ar, br, cr, F0, KK4,	3, 13);
   R( cl, dl, el, al, bl, F4, K4, 15,  5 );
   R( cr, dr, er, ar, br, F0, KK4,	9, 11);
   R( bl, cl, dl, el, al, F4, K4, 13,  6 );
   R( br, cr, dr, er, ar, F0, KK4, 11, 11);
 
   dr += cl + hd->h1;
   hd->h1 = hd->h2 + dl + er;
   hd->h2 = hd->h3 + el + ar;
   hd->h3 = hd->h4 + al + br;
   hd->h4 = hd->h0 + bl + cr;
   hd->h0 = dr;
 
   return /*burn_stack*/ 104+5*sizeof(void*);
 }
 
 
 static unsigned int
 transform ( void *c, const unsigned char *data, size_t nblks )
 {
   unsigned int burn;
 
   do
     {
       burn = transform_blk (c, data);
       data += 64;
     }
   while (--nblks);
 
   return burn;
 }
 
 
 /*
  * The routine terminates the computation
  */
 static void
 rmd160_final( void *context )
 {
   RMD160_CONTEXT *hd = context;
   u32 t, th, msb, lsb;
   byte *p;
   unsigned int burn;
 
   t = hd->bctx.nblocks;
   if (sizeof t == sizeof hd->bctx.nblocks)
     th = hd->bctx.nblocks_high;
   else
     th = hd->bctx.nblocks >> 32;
 
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
   msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
   if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
   lsb <<= 3;
   msb <<= 3;
   msb |= t >> 29;
 
   if (hd->bctx.count < 56)  /* enough room */
     {
       hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
       if (hd->bctx.count < 56)
 	memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
 
       /* append the 64 bit count */
       buf_put_le32(hd->bctx.buf + 56, lsb);
       buf_put_le32(hd->bctx.buf + 60, msb);
       burn = transform (hd, hd->bctx.buf, 1);
     }
   else /* need one extra block */
     {
       hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
       /* fill pad and next block with zeroes */
       memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
 
       /* append the 64 bit count */
       buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
       buf_put_le32(hd->bctx.buf + 64 + 60, msb);
       burn = transform (hd, hd->bctx.buf, 2);
     }
 
   p = hd->bctx.buf;
 #define X(a) do { buf_put_le32(p, hd->h##a); p += 4; } while(0)
   X(0);
   X(1);
   X(2);
   X(3);
   X(4);
 #undef X
 
   hd->bctx.count = 0;
 
   _gcry_burn_stack (burn);
 }
 
 static byte *
 rmd160_read( void *context )
 {
   RMD160_CONTEXT *hd = context;
 
   return hd->bctx.buf;
 }
 
 
 
 /****************
  * Shortcut functions which puts the hash value of the supplied buffer iov
  * into outbuf which must have a size of 20 bytes.
  */
 static void
 _gcry_rmd160_hash_buffers (void *outbuf, size_t nbytes,
 			   const gcry_buffer_t *iov, int iovcnt)
 {
   RMD160_CONTEXT hd;
 
   (void)nbytes;
 
   rmd160_init (&hd, 0);
   for (;iovcnt > 0; iov++, iovcnt--)
     _gcry_md_block_write (&hd,
                           (const char*)iov[0].data + iov[0].off, iov[0].len);
   rmd160_final ( &hd );
   memcpy ( outbuf, hd.bctx.buf, 20 );
 }
 
 
 static const byte asn[15] = /* Object ID is 1.3.36.3.2.1 */
   { 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x24, 0x03,
     0x02, 0x01, 0x05, 0x00, 0x04, 0x14 };
 
 static const gcry_md_oid_spec_t oid_spec_rmd160[] =
   {
     /* rsaSignatureWithripemd160 */
     { "1.3.36.3.3.1.2" },
     /* TeleTrust hash algorithm.  */
     { "1.3.36.3.2.1" },
     { NULL }
   };
 
 const gcry_md_spec_t _gcry_digest_spec_rmd160 =
   {
     GCRY_MD_RMD160, {0, 0},
     "RIPEMD160", asn, DIM (asn), oid_spec_rmd160, 20,
     rmd160_init, _gcry_md_block_write, rmd160_final, rmd160_read, NULL,
     _gcry_rmd160_hash_buffers,
     sizeof (RMD160_CONTEXT)
   };
diff --git a/cipher/seed.c b/cipher/seed.c
index 4fd93d75..9cc34f9e 100644
--- a/cipher/seed.c
+++ b/cipher/seed.c
@@ -1,478 +1,478 @@
 /* SEED for libgcrypt
  *	Copyright (C) 2006 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * --
  * This implementation was provided for libgcrypt in public domain
  * by Hye-Shik Chang <perky@FreeBSD.org>, July 2006.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 #include "types.h"  /* for byte and u32 typedefs */
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
 #include "cipher-internal.h"
 
 #define NUMKC	16
 
 #define GETU32(pt) buf_get_be32(pt)
 #define PUTU32(ct, st) buf_put_be32(ct, st)
 
 union wordbuf
 {
   u32 w;
   byte b[4];
 };
 
 #ifdef WORDS_BIGENDIAN
 #define b0  b[3]
 #define b1  b[2]
 #define b2  b[1]
 #define b3  b[0]
 #else
 #define b0  b[0]
 #define b1  b[1]
 #define b2  b[2]
 #define b3  b[3]
 #endif
 
 static const char *selftest(void);
 
 typedef struct
 {
   u32 keyschedule[32];
 } SEED_context;
 
 static const u32 SS0[256] = {
     0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, 0x14445054, 0x1d0d111c,
     0x2c8ca0ac, 0x25052124, 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c,
     0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360, 0x28082028, 0x04444044,
     0x20002020, 0x1d8d919c, 0x20c0e0e0, 0x22c2e2e0, 0x08c8c0c8, 0x17071314,
     0x2585a1a4, 0x0f8f838c, 0x03030300, 0x3b4b7378, 0x3b8bb3b8, 0x13031310,
     0x12c2d2d0, 0x2ecee2ec, 0x30407070, 0x0c8c808c, 0x3f0f333c, 0x2888a0a8,
     0x32023230, 0x1dcdd1dc, 0x36c6f2f4, 0x34447074, 0x2ccce0ec, 0x15859194,
     0x0b0b0308, 0x17475354, 0x1c4c505c, 0x1b4b5358, 0x3d8db1bc, 0x01010100,
     0x24042024, 0x1c0c101c, 0x33437370, 0x18889098, 0x10001010, 0x0cccc0cc,
     0x32c2f2f0, 0x19c9d1d8, 0x2c0c202c, 0x27c7e3e4, 0x32427270, 0x03838380,
     0x1b8b9398, 0x11c1d1d0, 0x06868284, 0x09c9c1c8, 0x20406060, 0x10405050,
     0x2383a3a0, 0x2bcbe3e8, 0x0d0d010c, 0x3686b2b4, 0x1e8e929c, 0x0f4f434c,
     0x3787b3b4, 0x1a4a5258, 0x06c6c2c4, 0x38487078, 0x2686a2a4, 0x12021210,
     0x2f8fa3ac, 0x15c5d1d4, 0x21416160, 0x03c3c3c0, 0x3484b0b4, 0x01414140,
     0x12425250, 0x3d4d717c, 0x0d8d818c, 0x08080008, 0x1f0f131c, 0x19899198,
     0x00000000, 0x19091118, 0x04040004, 0x13435350, 0x37c7f3f4, 0x21c1e1e0,
     0x3dcdf1fc, 0x36467274, 0x2f0f232c, 0x27072324, 0x3080b0b0, 0x0b8b8388,
     0x0e0e020c, 0x2b8ba3a8, 0x2282a2a0, 0x2e4e626c, 0x13839390, 0x0d4d414c,
     0x29496168, 0x3c4c707c, 0x09090108, 0x0a0a0208, 0x3f8fb3bc, 0x2fcfe3ec,
     0x33c3f3f0, 0x05c5c1c4, 0x07878384, 0x14041014, 0x3ecef2fc, 0x24446064,
     0x1eced2dc, 0x2e0e222c, 0x0b4b4348, 0x1a0a1218, 0x06060204, 0x21012120,
     0x2b4b6368, 0x26466264, 0x02020200, 0x35c5f1f4, 0x12829290, 0x0a8a8288,
     0x0c0c000c, 0x3383b3b0, 0x3e4e727c, 0x10c0d0d0, 0x3a4a7278, 0x07474344,
     0x16869294, 0x25c5e1e4, 0x26062224, 0x00808080, 0x2d8da1ac, 0x1fcfd3dc,
     0x2181a1a0, 0x30003030, 0x37073334, 0x2e8ea2ac, 0x36063234, 0x15051114,
     0x22022220, 0x38083038, 0x34c4f0f4, 0x2787a3a4, 0x05454144, 0x0c4c404c,
     0x01818180, 0x29c9e1e8, 0x04848084, 0x17879394, 0x35053134, 0x0bcbc3c8,
     0x0ecec2cc, 0x3c0c303c, 0x31417170, 0x11011110, 0x07c7c3c4, 0x09898188,
     0x35457174, 0x3bcbf3f8, 0x1acad2d8, 0x38c8f0f8, 0x14849094, 0x19495158,
     0x02828280, 0x04c4c0c4, 0x3fcff3fc, 0x09494148, 0x39093138, 0x27476364,
     0x00c0c0c0, 0x0fcfc3cc, 0x17c7d3d4, 0x3888b0b8, 0x0f0f030c, 0x0e8e828c,
     0x02424240, 0x23032320, 0x11819190, 0x2c4c606c, 0x1bcbd3d8, 0x2484a0a4,
     0x34043034, 0x31c1f1f0, 0x08484048, 0x02c2c2c0, 0x2f4f636c, 0x3d0d313c,
     0x2d0d212c, 0x00404040, 0x3e8eb2bc, 0x3e0e323c, 0x3c8cb0bc, 0x01c1c1c0,
     0x2a8aa2a8, 0x3a8ab2b8, 0x0e4e424c, 0x15455154, 0x3b0b3338, 0x1cccd0dc,
     0x28486068, 0x3f4f737c, 0x1c8c909c, 0x18c8d0d8, 0x0a4a4248, 0x16465254,
     0x37477374, 0x2080a0a0, 0x2dcde1ec, 0x06464244, 0x3585b1b4, 0x2b0b2328,
     0x25456164, 0x3acaf2f8, 0x23c3e3e0, 0x3989b1b8, 0x3181b1b0, 0x1f8f939c,
     0x1e4e525c, 0x39c9f1f8, 0x26c6e2e4, 0x3282b2b0, 0x31013130, 0x2acae2e8,
     0x2d4d616c, 0x1f4f535c, 0x24c4e0e4, 0x30c0f0f0, 0x0dcdc1cc, 0x08888088,
     0x16061214, 0x3a0a3238, 0x18485058, 0x14c4d0d4, 0x22426260, 0x29092128,
     0x07070304, 0x33033330, 0x28c8e0e8, 0x1b0b1318, 0x05050104, 0x39497178,
     0x10809090, 0x2a4a6268, 0x2a0a2228, 0x1a8a9298,
 };
 
 static const u32 SS1[256] = {
     0x38380830, 0xe828c8e0, 0x2c2d0d21, 0xa42686a2, 0xcc0fcfc3, 0xdc1eced2,
     0xb03383b3, 0xb83888b0, 0xac2f8fa3, 0x60204060, 0x54154551, 0xc407c7c3,
     0x44044440, 0x6c2f4f63, 0x682b4b63, 0x581b4b53, 0xc003c3c3, 0x60224262,
     0x30330333, 0xb43585b1, 0x28290921, 0xa02080a0, 0xe022c2e2, 0xa42787a3,
     0xd013c3d3, 0x90118191, 0x10110111, 0x04060602, 0x1c1c0c10, 0xbc3c8cb0,
     0x34360632, 0x480b4b43, 0xec2fcfe3, 0x88088880, 0x6c2c4c60, 0xa82888a0,
     0x14170713, 0xc404c4c0, 0x14160612, 0xf434c4f0, 0xc002c2c2, 0x44054541,
     0xe021c1e1, 0xd416c6d2, 0x3c3f0f33, 0x3c3d0d31, 0x8c0e8e82, 0x98188890,
     0x28280820, 0x4c0e4e42, 0xf436c6f2, 0x3c3e0e32, 0xa42585a1, 0xf839c9f1,
     0x0c0d0d01, 0xdc1fcfd3, 0xd818c8d0, 0x282b0b23, 0x64264662, 0x783a4a72,
     0x24270723, 0x2c2f0f23, 0xf031c1f1, 0x70324272, 0x40024242, 0xd414c4d0,
     0x40014141, 0xc000c0c0, 0x70334373, 0x64274763, 0xac2c8ca0, 0x880b8b83,
     0xf437c7f3, 0xac2d8da1, 0x80008080, 0x1c1f0f13, 0xc80acac2, 0x2c2c0c20,
     0xa82a8aa2, 0x34340430, 0xd012c2d2, 0x080b0b03, 0xec2ecee2, 0xe829c9e1,
     0x5c1d4d51, 0x94148490, 0x18180810, 0xf838c8f0, 0x54174753, 0xac2e8ea2,
     0x08080800, 0xc405c5c1, 0x10130313, 0xcc0dcdc1, 0x84068682, 0xb83989b1,
     0xfc3fcff3, 0x7c3d4d71, 0xc001c1c1, 0x30310131, 0xf435c5f1, 0x880a8a82,
     0x682a4a62, 0xb03181b1, 0xd011c1d1, 0x20200020, 0xd417c7d3, 0x00020202,
     0x20220222, 0x04040400, 0x68284860, 0x70314171, 0x04070703, 0xd81bcbd3,
     0x9c1d8d91, 0x98198991, 0x60214161, 0xbc3e8eb2, 0xe426c6e2, 0x58194951,
     0xdc1dcdd1, 0x50114151, 0x90108090, 0xdc1cccd0, 0x981a8a92, 0xa02383a3,
     0xa82b8ba3, 0xd010c0d0, 0x80018181, 0x0c0f0f03, 0x44074743, 0x181a0a12,
     0xe023c3e3, 0xec2ccce0, 0x8c0d8d81, 0xbc3f8fb3, 0x94168692, 0x783b4b73,
     0x5c1c4c50, 0xa02282a2, 0xa02181a1, 0x60234363, 0x20230323, 0x4c0d4d41,
     0xc808c8c0, 0x9c1e8e92, 0x9c1c8c90, 0x383a0a32, 0x0c0c0c00, 0x2c2e0e22,
     0xb83a8ab2, 0x6c2e4e62, 0x9c1f8f93, 0x581a4a52, 0xf032c2f2, 0x90128292,
     0xf033c3f3, 0x48094941, 0x78384870, 0xcc0cccc0, 0x14150511, 0xf83bcbf3,
     0x70304070, 0x74354571, 0x7c3f4f73, 0x34350531, 0x10100010, 0x00030303,
     0x64244460, 0x6c2d4d61, 0xc406c6c2, 0x74344470, 0xd415c5d1, 0xb43484b0,
     0xe82acae2, 0x08090901, 0x74364672, 0x18190911, 0xfc3ecef2, 0x40004040,
     0x10120212, 0xe020c0e0, 0xbc3d8db1, 0x04050501, 0xf83acaf2, 0x00010101,
     0xf030c0f0, 0x282a0a22, 0x5c1e4e52, 0xa82989a1, 0x54164652, 0x40034343,
     0x84058581, 0x14140410, 0x88098981, 0x981b8b93, 0xb03080b0, 0xe425c5e1,
     0x48084840, 0x78394971, 0x94178793, 0xfc3cccf0, 0x1c1e0e12, 0x80028282,
     0x20210121, 0x8c0c8c80, 0x181b0b13, 0x5c1f4f53, 0x74374773, 0x54144450,
     0xb03282b2, 0x1c1d0d11, 0x24250521, 0x4c0f4f43, 0x00000000, 0x44064642,
     0xec2dcde1, 0x58184850, 0x50124252, 0xe82bcbe3, 0x7c3e4e72, 0xd81acad2,
     0xc809c9c1, 0xfc3dcdf1, 0x30300030, 0x94158591, 0x64254561, 0x3c3c0c30,
     0xb43686b2, 0xe424c4e0, 0xb83b8bb3, 0x7c3c4c70, 0x0c0e0e02, 0x50104050,
     0x38390931, 0x24260622, 0x30320232, 0x84048480, 0x68294961, 0x90138393,
     0x34370733, 0xe427c7e3, 0x24240420, 0xa42484a0, 0xc80bcbc3, 0x50134353,
     0x080a0a02, 0x84078783, 0xd819c9d1, 0x4c0c4c40, 0x80038383, 0x8c0f8f83,
     0xcc0ecec2, 0x383b0b33, 0x480a4a42, 0xb43787b3,
 };
 
 static const u32 SS2[256] = {
     0xa1a82989, 0x81840585, 0xd2d416c6, 0xd3d013c3, 0x50541444, 0x111c1d0d,
     0xa0ac2c8c, 0x21242505, 0x515c1d4d, 0x43400343, 0x10181808, 0x121c1e0e,
     0x51501141, 0xf0fc3ccc, 0xc2c80aca, 0x63602343, 0x20282808, 0x40440444,
     0x20202000, 0x919c1d8d, 0xe0e020c0, 0xe2e022c2, 0xc0c808c8, 0x13141707,
     0xa1a42585, 0x838c0f8f, 0x03000303, 0x73783b4b, 0xb3b83b8b, 0x13101303,
     0xd2d012c2, 0xe2ec2ece, 0x70703040, 0x808c0c8c, 0x333c3f0f, 0xa0a82888,
     0x32303202, 0xd1dc1dcd, 0xf2f436c6, 0x70743444, 0xe0ec2ccc, 0x91941585,
     0x03080b0b, 0x53541747, 0x505c1c4c, 0x53581b4b, 0xb1bc3d8d, 0x01000101,
     0x20242404, 0x101c1c0c, 0x73703343, 0x90981888, 0x10101000, 0xc0cc0ccc,
     0xf2f032c2, 0xd1d819c9, 0x202c2c0c, 0xe3e427c7, 0x72703242, 0x83800383,
     0x93981b8b, 0xd1d011c1, 0x82840686, 0xc1c809c9, 0x60602040, 0x50501040,
     0xa3a02383, 0xe3e82bcb, 0x010c0d0d, 0xb2b43686, 0x929c1e8e, 0x434c0f4f,
     0xb3b43787, 0x52581a4a, 0xc2c406c6, 0x70783848, 0xa2a42686, 0x12101202,
     0xa3ac2f8f, 0xd1d415c5, 0x61602141, 0xc3c003c3, 0xb0b43484, 0x41400141,
     0x52501242, 0x717c3d4d, 0x818c0d8d, 0x00080808, 0x131c1f0f, 0x91981989,
     0x00000000, 0x11181909, 0x00040404, 0x53501343, 0xf3f437c7, 0xe1e021c1,
     0xf1fc3dcd, 0x72743646, 0x232c2f0f, 0x23242707, 0xb0b03080, 0x83880b8b,
     0x020c0e0e, 0xa3a82b8b, 0xa2a02282, 0x626c2e4e, 0x93901383, 0x414c0d4d,
     0x61682949, 0x707c3c4c, 0x01080909, 0x02080a0a, 0xb3bc3f8f, 0xe3ec2fcf,
     0xf3f033c3, 0xc1c405c5, 0x83840787, 0x10141404, 0xf2fc3ece, 0x60642444,
     0xd2dc1ece, 0x222c2e0e, 0x43480b4b, 0x12181a0a, 0x02040606, 0x21202101,
     0x63682b4b, 0x62642646, 0x02000202, 0xf1f435c5, 0x92901282, 0x82880a8a,
     0x000c0c0c, 0xb3b03383, 0x727c3e4e, 0xd0d010c0, 0x72783a4a, 0x43440747,
     0x92941686, 0xe1e425c5, 0x22242606, 0x80800080, 0xa1ac2d8d, 0xd3dc1fcf,
     0xa1a02181, 0x30303000, 0x33343707, 0xa2ac2e8e, 0x32343606, 0x11141505,
     0x22202202, 0x30383808, 0xf0f434c4, 0xa3a42787, 0x41440545, 0x404c0c4c,
     0x81800181, 0xe1e829c9, 0x80840484, 0x93941787, 0x31343505, 0xc3c80bcb,
     0xc2cc0ece, 0x303c3c0c, 0x71703141, 0x11101101, 0xc3c407c7, 0x81880989,
     0x71743545, 0xf3f83bcb, 0xd2d81aca, 0xf0f838c8, 0x90941484, 0x51581949,
     0x82800282, 0xc0c404c4, 0xf3fc3fcf, 0x41480949, 0x31383909, 0x63642747,
     0xc0c000c0, 0xc3cc0fcf, 0xd3d417c7, 0xb0b83888, 0x030c0f0f, 0x828c0e8e,
     0x42400242, 0x23202303, 0x91901181, 0x606c2c4c, 0xd3d81bcb, 0xa0a42484,
     0x30343404, 0xf1f031c1, 0x40480848, 0xc2c002c2, 0x636c2f4f, 0x313c3d0d,
     0x212c2d0d, 0x40400040, 0xb2bc3e8e, 0x323c3e0e, 0xb0bc3c8c, 0xc1c001c1,
     0xa2a82a8a, 0xb2b83a8a, 0x424c0e4e, 0x51541545, 0x33383b0b, 0xd0dc1ccc,
     0x60682848, 0x737c3f4f, 0x909c1c8c, 0xd0d818c8, 0x42480a4a, 0x52541646,
     0x73743747, 0xa0a02080, 0xe1ec2dcd, 0x42440646, 0xb1b43585, 0x23282b0b,
     0x61642545, 0xf2f83aca, 0xe3e023c3, 0xb1b83989, 0xb1b03181, 0x939c1f8f,
     0x525c1e4e, 0xf1f839c9, 0xe2e426c6, 0xb2b03282, 0x31303101, 0xe2e82aca,
     0x616c2d4d, 0x535c1f4f, 0xe0e424c4, 0xf0f030c0, 0xc1cc0dcd, 0x80880888,
     0x12141606, 0x32383a0a, 0x50581848, 0xd0d414c4, 0x62602242, 0x21282909,
     0x03040707, 0x33303303, 0xe0e828c8, 0x13181b0b, 0x01040505, 0x71783949,
     0x90901080, 0x62682a4a, 0x22282a0a, 0x92981a8a,
 };
 
 static const u32 SS3[256] = {
     0x08303838, 0xc8e0e828, 0x0d212c2d, 0x86a2a426, 0xcfc3cc0f, 0xced2dc1e,
     0x83b3b033, 0x88b0b838, 0x8fa3ac2f, 0x40606020, 0x45515415, 0xc7c3c407,
     0x44404404, 0x4f636c2f, 0x4b63682b, 0x4b53581b, 0xc3c3c003, 0x42626022,
     0x03333033, 0x85b1b435, 0x09212829, 0x80a0a020, 0xc2e2e022, 0x87a3a427,
     0xc3d3d013, 0x81919011, 0x01111011, 0x06020406, 0x0c101c1c, 0x8cb0bc3c,
     0x06323436, 0x4b43480b, 0xcfe3ec2f, 0x88808808, 0x4c606c2c, 0x88a0a828,
     0x07131417, 0xc4c0c404, 0x06121416, 0xc4f0f434, 0xc2c2c002, 0x45414405,
     0xc1e1e021, 0xc6d2d416, 0x0f333c3f, 0x0d313c3d, 0x8e828c0e, 0x88909818,
     0x08202828, 0x4e424c0e, 0xc6f2f436, 0x0e323c3e, 0x85a1a425, 0xc9f1f839,
     0x0d010c0d, 0xcfd3dc1f, 0xc8d0d818, 0x0b23282b, 0x46626426, 0x4a72783a,
     0x07232427, 0x0f232c2f, 0xc1f1f031, 0x42727032, 0x42424002, 0xc4d0d414,
     0x41414001, 0xc0c0c000, 0x43737033, 0x47636427, 0x8ca0ac2c, 0x8b83880b,
     0xc7f3f437, 0x8da1ac2d, 0x80808000, 0x0f131c1f, 0xcac2c80a, 0x0c202c2c,
     0x8aa2a82a, 0x04303434, 0xc2d2d012, 0x0b03080b, 0xcee2ec2e, 0xc9e1e829,
     0x4d515c1d, 0x84909414, 0x08101818, 0xc8f0f838, 0x47535417, 0x8ea2ac2e,
     0x08000808, 0xc5c1c405, 0x03131013, 0xcdc1cc0d, 0x86828406, 0x89b1b839,
     0xcff3fc3f, 0x4d717c3d, 0xc1c1c001, 0x01313031, 0xc5f1f435, 0x8a82880a,
     0x4a62682a, 0x81b1b031, 0xc1d1d011, 0x00202020, 0xc7d3d417, 0x02020002,
     0x02222022, 0x04000404, 0x48606828, 0x41717031, 0x07030407, 0xcbd3d81b,
     0x8d919c1d, 0x89919819, 0x41616021, 0x8eb2bc3e, 0xc6e2e426, 0x49515819,
     0xcdd1dc1d, 0x41515011, 0x80909010, 0xccd0dc1c, 0x8a92981a, 0x83a3a023,
     0x8ba3a82b, 0xc0d0d010, 0x81818001, 0x0f030c0f, 0x47434407, 0x0a12181a,
     0xc3e3e023, 0xcce0ec2c, 0x8d818c0d, 0x8fb3bc3f, 0x86929416, 0x4b73783b,
     0x4c505c1c, 0x82a2a022, 0x81a1a021, 0x43636023, 0x03232023, 0x4d414c0d,
     0xc8c0c808, 0x8e929c1e, 0x8c909c1c, 0x0a32383a, 0x0c000c0c, 0x0e222c2e,
     0x8ab2b83a, 0x4e626c2e, 0x8f939c1f, 0x4a52581a, 0xc2f2f032, 0x82929012,
     0xc3f3f033, 0x49414809, 0x48707838, 0xccc0cc0c, 0x05111415, 0xcbf3f83b,
     0x40707030, 0x45717435, 0x4f737c3f, 0x05313435, 0x00101010, 0x03030003,
     0x44606424, 0x4d616c2d, 0xc6c2c406, 0x44707434, 0xc5d1d415, 0x84b0b434,
     0xcae2e82a, 0x09010809, 0x46727436, 0x09111819, 0xcef2fc3e, 0x40404000,
     0x02121012, 0xc0e0e020, 0x8db1bc3d, 0x05010405, 0xcaf2f83a, 0x01010001,
     0xc0f0f030, 0x0a22282a, 0x4e525c1e, 0x89a1a829, 0x46525416, 0x43434003,
     0x85818405, 0x04101414, 0x89818809, 0x8b93981b, 0x80b0b030, 0xc5e1e425,
     0x48404808, 0x49717839, 0x87939417, 0xccf0fc3c, 0x0e121c1e, 0x82828002,
     0x01212021, 0x8c808c0c, 0x0b13181b, 0x4f535c1f, 0x47737437, 0x44505414,
     0x82b2b032, 0x0d111c1d, 0x05212425, 0x4f434c0f, 0x00000000, 0x46424406,
     0xcde1ec2d, 0x48505818, 0x42525012, 0xcbe3e82b, 0x4e727c3e, 0xcad2d81a,
     0xc9c1c809, 0xcdf1fc3d, 0x00303030, 0x85919415, 0x45616425, 0x0c303c3c,
     0x86b2b436, 0xc4e0e424, 0x8bb3b83b, 0x4c707c3c, 0x0e020c0e, 0x40505010,
     0x09313839, 0x06222426, 0x02323032, 0x84808404, 0x49616829, 0x83939013,
     0x07333437, 0xc7e3e427, 0x04202424, 0x84a0a424, 0xcbc3c80b, 0x43535013,
     0x0a02080a, 0x87838407, 0xc9d1d819, 0x4c404c0c, 0x83838003, 0x8f838c0f,
     0xcec2cc0e, 0x0b33383b, 0x4a42480a, 0x87b3b437,
 };
 
 static const u32 KC[NUMKC] = {
     0x9e3779b9, 0x3c6ef373, 0x78dde6e6, 0xf1bbcdcc,
     0xe3779b99, 0xc6ef3733, 0x8dde6e67, 0x1bbcdccf,
     0x3779b99e, 0x6ef3733c, 0xdde6e678, 0xbbcdccf1,
     0x779b99e3, 0xef3733c6, 0xde6e678d, 0xbcdccf1b,
 };
 
 
 
 /* Perform the key setup.
  */
 static gcry_err_code_t
 do_setkey (SEED_context *ctx, const byte *key, const unsigned keylen)
 {
   static int initialized = 0;
   static const char *selftest_failed=0;
   u32 x1, x2, x3, x4;
   union wordbuf t0, t1;
   u32 *keyout = ctx->keyschedule;
   int i;
 
   if (!initialized)
     {
       initialized = 1;
       selftest_failed = selftest ();
       if( selftest_failed )
         log_error ("%s\n", selftest_failed );
     }
   if (selftest_failed)
     return GPG_ERR_SELFTEST_FAILED;
 
   if (keylen != 16)
     return GPG_ERR_INV_KEYLEN;
 
   x1 = GETU32 (key);
   x2 = GETU32 (key+4);
   x3 = GETU32 (key+8);
   x4 = GETU32 (key+12);
 
   for (i = 0; i < NUMKC; i++)
     {
       t0.w = x1 + x3 - KC[i];
       t1.w = x2 + KC[i] - x4;
       *(keyout++) = SS0[t0.b0] ^ SS1[t0.b1] ^ SS2[t0.b2] ^ SS3[t0.b3];
       *(keyout++) = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3];
 
       if (i % 2 == 0)
 	{
 	  t0.w = x1;
 	  x1 = (x1>>8) ^ (x2<<24);
 	  x2 = (x2>>8) ^ (t0.w<<24);
 	}
       else
 	{
 	  t0.w = x3;
 	  x3 = (x3<<8) ^ (x4>>24);
 	  x4 = (x4<<8) ^ (t0.w>>24);
 	}
     }
 
   return 0;
 }
 
 static gcry_err_code_t
 seed_setkey (void *context, const byte *key, const unsigned keylen,
              cipher_bulk_ops_t *bulk_ops)
 {
   SEED_context *ctx = context;
   int rc = do_setkey (ctx, key, keylen);
   (void)bulk_ops;
   _gcry_burn_stack (4*6 + sizeof(void*)*2 + sizeof(int)*2);
   return rc;
 }
 
 
 
 #define OP(X1, X2, X3, X4, rbase)				\
     t0.w = X3 ^ ctx->keyschedule[rbase];			\
     t1.w = X4 ^ ctx->keyschedule[rbase+1];			\
     t1.w ^= t0.w;						\
     t1.w = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3];	\
     t0.w += t1.w;						\
     t0.w = SS0[t0.b0] ^ SS1[t0.b1] ^ SS2[t0.b2] ^ SS3[t0.b3];	\
     t1.w += t0.w;						\
     t1.w = SS0[t1.b0] ^ SS1[t1.b1] ^ SS2[t1.b2] ^ SS3[t1.b3];	\
     t0.w += t1.w;						\
     X1 ^= t0.w;							\
     X2 ^= t1.w;
 
 /* Encrypt one block.  inbuf and outbuf may be the same. */
 static void
 do_encrypt (const SEED_context *ctx, byte *outbuf, const byte *inbuf)
 {
   u32 x1, x2, x3, x4;
   union wordbuf t0, t1;
 
   x1 = GETU32 (inbuf);
   x2 = GETU32 (inbuf+4);
   x3 = GETU32 (inbuf+8);
   x4 = GETU32 (inbuf+12);
 
   OP (x1, x2, x3, x4, 0);
   OP (x3, x4, x1, x2, 2);
   OP (x1, x2, x3, x4, 4);
   OP (x3, x4, x1, x2, 6);
   OP (x1, x2, x3, x4, 8);
   OP (x3, x4, x1, x2, 10);
   OP (x1, x2, x3, x4, 12);
   OP (x3, x4, x1, x2, 14);
   OP (x1, x2, x3, x4, 16);
   OP (x3, x4, x1, x2, 18);
   OP (x1, x2, x3, x4, 20);
   OP (x3, x4, x1, x2, 22);
   OP (x1, x2, x3, x4, 24);
   OP (x3, x4, x1, x2, 26);
   OP (x1, x2, x3, x4, 28);
   OP (x3, x4, x1, x2, 30);
 
   PUTU32 (outbuf, x3);
   PUTU32 (outbuf+4, x4);
   PUTU32 (outbuf+8, x1);
   PUTU32 (outbuf+12, x2);
 }
 
 static unsigned int
 seed_encrypt (void *context, byte *outbuf, const byte *inbuf)
 {
   SEED_context *ctx = context;
 
   do_encrypt (ctx, outbuf, inbuf);
   return /*burn_stack*/ (4*6);
 }
 
 
 
 /* Decrypt one block.  inbuf and outbuf may be the same. */
 static void
 do_decrypt (SEED_context *ctx, byte *outbuf, const byte *inbuf)
 {
   u32 x1, x2, x3, x4;
   union wordbuf t0, t1;
 
   x1 = GETU32 (inbuf);
   x2 = GETU32 (inbuf+4);
   x3 = GETU32 (inbuf+8);
   x4 = GETU32 (inbuf+12);
 
   OP (x1, x2, x3, x4, 30);
   OP (x3, x4, x1, x2, 28);
   OP (x1, x2, x3, x4, 26);
   OP (x3, x4, x1, x2, 24);
   OP (x1, x2, x3, x4, 22);
   OP (x3, x4, x1, x2, 20);
   OP (x1, x2, x3, x4, 18);
   OP (x3, x4, x1, x2, 16);
   OP (x1, x2, x3, x4, 14);
   OP (x3, x4, x1, x2, 12);
   OP (x1, x2, x3, x4, 10);
   OP (x3, x4, x1, x2, 8);
   OP (x1, x2, x3, x4, 6);
   OP (x3, x4, x1, x2, 4);
   OP (x1, x2, x3, x4, 2);
   OP (x3, x4, x1, x2, 0);
 
   PUTU32 (outbuf, x3);
   PUTU32 (outbuf+4, x4);
   PUTU32 (outbuf+8, x1);
   PUTU32 (outbuf+12, x2);
 }
 
 static unsigned int
 seed_decrypt (void *context, byte *outbuf, const byte *inbuf)
 {
   SEED_context *ctx = context;
 
   do_decrypt (ctx, outbuf, inbuf);
   return /*burn_stack*/ (4*6);
 }
 
 
 /* Test a single encryption and decryption with each key size. */
 static const char*
 selftest (void)
 {
   SEED_context ctx;
   byte scratch[16];
 
   /* The test vector is taken from the appendix section B.3 of RFC4269.
    */
   static const byte plaintext[16] = {
     0x83, 0xA2, 0xF8, 0xA2, 0x88, 0x64, 0x1F, 0xB9,
     0xA4, 0xE9, 0xA5, 0xCC, 0x2F, 0x13, 0x1C, 0x7D
   };
   static const byte key[16] = {
     0x47, 0x06, 0x48, 0x08, 0x51, 0xE6, 0x1B, 0xE8,
     0x5D, 0x74, 0xBF, 0xB3, 0xFD, 0x95, 0x61, 0x85
   };
   static const byte ciphertext[16] = {
     0xEE, 0x54, 0xD1, 0x3E, 0xBC, 0xAE, 0x70, 0x6D,
     0x22, 0x6B, 0xC3, 0x14, 0x2C, 0xD4, 0x0D, 0x4A,
   };
 
   seed_setkey (&ctx, key, sizeof(key), NULL);
   seed_encrypt (&ctx, scratch, plaintext);
   if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
     return "SEED test encryption failed.";
   seed_decrypt (&ctx, scratch, scratch);
   if (memcmp (scratch, plaintext, sizeof (plaintext)))
     return "SEED test decryption failed.";
 
   return NULL;
 }
 
 
 
 static const gcry_cipher_oid_spec_t seed_oids[] =
   {
     { "1.2.410.200004.1.3", GCRY_CIPHER_MODE_ECB },
     { "1.2.410.200004.1.4", GCRY_CIPHER_MODE_CBC },
     { "1.2.410.200004.1.5", GCRY_CIPHER_MODE_CFB },
     { "1.2.410.200004.1.6", GCRY_CIPHER_MODE_OFB },
     { NULL }
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_seed =
   {
     GCRY_CIPHER_SEED, {0, 0},
     "SEED", NULL, seed_oids, 16, 128, sizeof (SEED_context),
     seed_setkey, seed_encrypt, seed_decrypt,
   };
diff --git a/cipher/serpent.c b/cipher/serpent.c
index 8fa47c7c..908523c2 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -1,1837 +1,1836 @@
 /* serpent.c - Implementation of the Serpent encryption algorithm.
  *	Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 
 #include <string.h>
 #include <stdio.h>
 
 #include "types.h"
 #include "g10lib.h"
 #include "cipher.h"
 #include "bithelp.h"
 #include "bufhelp.h"
 #include "cipher-internal.h"
 #include "bulkhelp.h"
 
 
 /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
 #undef USE_SSE2
 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSE2 1
 #endif
 
 /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
 #undef USE_AVX2
 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # if defined(ENABLE_AVX2_SUPPORT)
 #  define USE_AVX2 1
 # endif
 #endif
 
 /* USE_NEON indicates whether to enable ARM NEON assembly code. */
 #undef USE_NEON
 #ifdef ENABLE_NEON_SUPPORT
 # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
      && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
      && defined(HAVE_GCC_INLINE_ASM_NEON)
 #  define USE_NEON 1
 # endif
 #endif /*ENABLE_NEON_SUPPORT*/
 
 /* Number of rounds per Serpent encrypt/decrypt operation.  */
 #define ROUNDS 32
 
 /* Magic number, used during generating of the subkeys.  */
 #define PHI 0x9E3779B9
 
 /* Serpent works on 128 bit blocks.  */
 typedef u32 serpent_block_t[4];
 
 /* Serpent key, provided by the user.  If the original key is shorter
    than 256 bits, it is padded.  */
 typedef u32 serpent_key_t[8];
 
 /* The key schedule consists of 33 128 bit subkeys.  */
 typedef u32 serpent_subkeys_t[ROUNDS + 1][4];
 
 /* A Serpent context.  */
 typedef struct serpent_context
 {
   serpent_subkeys_t keys;	/* Generated subkeys.  */
 
 #ifdef USE_AVX2
   int use_avx2;
 #endif
 #ifdef USE_NEON
   int use_neon;
 #endif
 } serpent_context_t;
 
 
 /* Assembly implementations use SystemV ABI, ABI conversion and additional
  * stack to store XMM6-XMM15 needed on Win64. */
 #undef ASM_FUNC_ABI
 #if defined(USE_SSE2) || defined(USE_AVX2)
 # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
 #  define ASM_FUNC_ABI __attribute__((sysv_abi))
 # else
 #  define ASM_FUNC_ABI
 # endif
 #endif
 
 
 #ifdef USE_SSE2
 /* Assembler implementations of Serpent using SSE2.  Process 8 block in
    parallel.
  */
 extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *offset,
 				       unsigned char *checksum,
 				       const u64 Ls[8]) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *offset,
 				       unsigned char *checksum,
 				       const u64 Ls[8]) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx,
 					const unsigned char *abuf,
 					unsigned char *offset,
 					unsigned char *checksum,
 					const u64 Ls[8]) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_blk8(const serpent_context_t *c, byte *out,
 				    const byte *in, int encrypt) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX2
 /* Assembler implementations of Serpent using AVX2.  Process 16 block in
    parallel.
  */
 extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *offset,
 				       unsigned char *checksum,
 				       const u64 Ls[16]) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *offset,
 				       unsigned char *checksum,
 				       const u64 Ls[16]) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx,
 					const unsigned char *abuf,
 					unsigned char *offset,
 					unsigned char *checksum,
 					const u64 Ls[16]) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_blk16(const serpent_context_t *c, byte *out,
 				     const byte *in, int encrypt) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_NEON
 /* Assembler implementations of Serpent using ARM NEON.  Process 8 block in
    parallel.
  */
 extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *ctr);
 
 extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *iv);
 
 extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *iv);
 
 extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *offset,
 				       unsigned char *checksum,
 				       const void *Ls[8]);
 
 extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *offset,
 				       unsigned char *checksum,
 				       const void *Ls[8]);
 
 extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx,
 					const unsigned char *abuf,
 					unsigned char *offset,
 					unsigned char *checksum,
 					const void *Ls[8]);
 
 extern void _gcry_serpent_neon_blk8(const serpent_context_t *c, byte *out,
 				    const byte *in, int encrypt);
 #endif
 
 
 /* Prototypes.  */
 static const char *serpent_test (void);
 
 static void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr,
 				   void *outbuf_arg, const void *inbuf_arg,
 				   size_t nblocks);
 static void _gcry_serpent_cbc_dec (void *context, unsigned char *iv,
 				   void *outbuf_arg, const void *inbuf_arg,
 				   size_t nblocks);
 static void _gcry_serpent_cfb_dec (void *context, unsigned char *iv,
 				   void *outbuf_arg, const void *inbuf_arg,
 				   size_t nblocks);
 static size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 				       const void *inbuf_arg, size_t nblocks,
 				       int encrypt);
 static size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 				      size_t nblocks);
 static void _gcry_serpent_xts_crypt (void *context, unsigned char *tweak,
 				     void *outbuf_arg, const void *inbuf_arg,
 				     size_t nblocks, int encrypt);
 static void _gcry_serpent_ecb_crypt (void *context, void *outbuf_arg,
 				     const void *inbuf_arg, size_t nblocks,
 				     int encrypt);
 
 
 /*
  * These are the S-Boxes of Serpent from following research paper.
  *
  *  D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
  *   (New York, New York, USA), p. 317–329, National Institute of Standards and
  *   Technology, 2000.
  *
  * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
  *
  */
 
 #define SBOX0(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r3 ^= r0; r4 =  r1; \
     r1 &= r3; r4 ^= r2; \
     r1 ^= r0; r0 |= r3; \
     r0 ^= r4; r4 ^= r3; \
     r3 ^= r2; r2 |= r1; \
     r2 ^= r4; r4 = ~r4; \
     r4 |= r1; r1 ^= r3; \
     r1 ^= r4; r3 |= r0; \
     r1 ^= r3; r4 ^= r3; \
     \
     w = r1; x = r4; y = r2; z = r0; \
   }
 
 #define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r2 = ~r2; r4 =  r1; \
     r1 |= r0; r4 = ~r4; \
     r1 ^= r2; r2 |= r4; \
     r1 ^= r3; r0 ^= r4; \
     r2 ^= r0; r0 &= r3; \
     r4 ^= r0; r0 |= r1; \
     r0 ^= r2; r3 ^= r4; \
     r2 ^= r1; r3 ^= r0; \
     r3 ^= r1; \
     r2 &= r3; \
     r4 ^= r2; \
     \
     w = r0; x = r4; y = r1; z = r3; \
   }
 
 #define SBOX1(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r0 = ~r0; r2 = ~r2; \
     r4 =  r0; r0 &= r1; \
     r2 ^= r0; r0 |= r3; \
     r3 ^= r2; r1 ^= r0; \
     r0 ^= r4; r4 |= r1; \
     r1 ^= r3; r2 |= r0; \
     r2 &= r4; r0 ^= r1; \
     r1 &= r2; \
     r1 ^= r0; r0 &= r2; \
     r0 ^= r4; \
     \
     w = r2; x = r0; y = r3; z = r1; \
   }
 
 #define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r4 =  r1; r1 ^= r3; \
     r3 &= r1; r4 ^= r2; \
     r3 ^= r0; r0 |= r1; \
     r2 ^= r3; r0 ^= r4; \
     r0 |= r2; r1 ^= r3; \
     r0 ^= r1; r1 |= r3; \
     r1 ^= r0; r4 = ~r4; \
     r4 ^= r1; r1 |= r0; \
     r1 ^= r0; \
     r1 |= r4; \
     r3 ^= r1; \
     \
     w = r4; x = r0; y = r3; z = r2; \
   }
 
 #define SBOX2(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r4 =  r0; r0 &= r2; \
     r0 ^= r3; r2 ^= r1; \
     r2 ^= r0; r3 |= r4; \
     r3 ^= r1; r4 ^= r2; \
     r1 =  r3; r3 |= r4; \
     r3 ^= r0; r0 &= r1; \
     r4 ^= r0; r1 ^= r3; \
     r1 ^= r4; r4 = ~r4; \
     \
     w = r2; x = r3; y = r1; z = r4; \
   }
 
 #define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r2 ^= r3; r3 ^= r0; \
     r4 =  r3; r3 &= r2; \
     r3 ^= r1; r1 |= r2; \
     r1 ^= r4; r4 &= r3; \
     r2 ^= r3; r4 &= r0; \
     r4 ^= r2; r2 &= r1; \
     r2 |= r0; r3 = ~r3; \
     r2 ^= r3; r0 ^= r3; \
     r0 &= r1; r3 ^= r4; \
     r3 ^= r0; \
     \
     w = r1; x = r4; y = r2; z = r3; \
   }
 
 #define SBOX3(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r4 =  r0; r0 |= r3; \
     r3 ^= r1; r1 &= r4; \
     r4 ^= r2; r2 ^= r3; \
     r3 &= r0; r4 |= r1; \
     r3 ^= r4; r0 ^= r1; \
     r4 &= r0; r1 ^= r3; \
     r4 ^= r2; r1 |= r0; \
     r1 ^= r2; r0 ^= r3; \
     r2 =  r1; r1 |= r3; \
     r1 ^= r0; \
     \
     w = r1; x = r2; y = r3; z = r4; \
   }
 
 #define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r4 =  r2; r2 ^= r1; \
     r0 ^= r2; r4 &= r2; \
     r4 ^= r0; r0 &= r1; \
     r1 ^= r3; r3 |= r4; \
     r2 ^= r3; r0 ^= r3; \
     r1 ^= r4; r3 &= r2; \
     r3 ^= r1; r1 ^= r0; \
     r1 |= r2; r0 ^= r3; \
     r1 ^= r4; \
     r0 ^= r1; \
     \
     w = r2; x = r1; y = r3; z = r0; \
   }
 
 #define SBOX4(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r1 ^= r3; r3 = ~r3; \
     r2 ^= r3; r3 ^= r0; \
     r4 =  r1; r1 &= r3; \
     r1 ^= r2; r4 ^= r3; \
     r0 ^= r4; r2 &= r4; \
     r2 ^= r0; r0 &= r1; \
     r3 ^= r0; r4 |= r1; \
     r4 ^= r0; r0 |= r3; \
     r0 ^= r2; r2 &= r3; \
     r0 = ~r0; r4 ^= r2; \
     \
     w = r1; x = r4; y = r0; z = r3; \
   }
 
 #define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r4 =  r2; r2 &= r3; \
     r2 ^= r1; r1 |= r3; \
     r1 &= r0; r4 ^= r2; \
     r4 ^= r1; r1 &= r2; \
     r0 = ~r0; r3 ^= r4; \
     r1 ^= r3; r3 &= r0; \
     r3 ^= r2; r0 ^= r1; \
     r2 &= r0; r3 ^= r0; \
     r2 ^= r4; \
     r2 |= r3; r3 ^= r0; \
     r2 ^= r1; \
     \
     w = r0; x = r3; y = r2; z = r4; \
   }
 
 #define SBOX5(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r0 ^= r1; r1 ^= r3; \
     r3 = ~r3; r4 =  r1; \
     r1 &= r0; r2 ^= r3; \
     r1 ^= r2; r2 |= r4; \
     r4 ^= r3; r3 &= r1; \
     r3 ^= r0; r4 ^= r1; \
     r4 ^= r2; r2 ^= r0; \
     r0 &= r3; r2 = ~r2; \
     r0 ^= r4; r4 |= r3; \
     r2 ^= r4; \
     \
     w = r1; x = r3; y = r0; z = r2; \
   }
 
 #define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r1 = ~r1; r4 =  r3; \
     r2 ^= r1; r3 |= r0; \
     r3 ^= r2; r2 |= r1; \
     r2 &= r0; r4 ^= r3; \
     r2 ^= r4; r4 |= r0; \
     r4 ^= r1; r1 &= r2; \
     r1 ^= r3; r4 ^= r2; \
     r3 &= r4; r4 ^= r1; \
     r3 ^= r4; r4 = ~r4; \
     r3 ^= r0; \
     \
     w = r1; x = r4; y = r3; z = r2; \
   }
 
 #define SBOX6(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r2 = ~r2; r4 =  r3; \
     r3 &= r0; r0 ^= r4; \
     r3 ^= r2; r2 |= r4; \
     r1 ^= r3; r2 ^= r0; \
     r0 |= r1; r2 ^= r1; \
     r4 ^= r0; r0 |= r3; \
     r0 ^= r2; r4 ^= r3; \
     r4 ^= r0; r3 = ~r3; \
     r2 &= r4; \
     r2 ^= r3; \
     \
     w = r0; x = r1; y = r4; z = r2; \
   }
 
 #define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r0 ^= r2; r4 =  r2; \
     r2 &= r0; r4 ^= r3; \
     r2 = ~r2; r3 ^= r1; \
     r2 ^= r3; r4 |= r0; \
     r0 ^= r2; r3 ^= r4; \
     r4 ^= r1; r1 &= r3; \
     r1 ^= r0; r0 ^= r3; \
     r0 |= r2; r3 ^= r1; \
     r4 ^= r0; \
     \
     w = r1; x = r2; y = r4; z = r3; \
   }
 
 #define SBOX7(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r4 =  r1; r1 |= r2; \
     r1 ^= r3; r4 ^= r2; \
     r2 ^= r1; r3 |= r4; \
     r3 &= r0; r4 ^= r2; \
     r3 ^= r1; r1 |= r4; \
     r1 ^= r0; r0 |= r4; \
     r0 ^= r2; r1 ^= r4; \
     r2 ^= r1; r1 &= r0; \
     r1 ^= r4; r2 = ~r2; \
     r2 |= r0; \
     r4 ^= r2; \
     \
     w = r4; x = r3; y = r1; z = r0; \
   }
 
 #define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
     u32 r4; \
     \
     r4 =  r2; r2 ^= r0; \
     r0 &= r3; r4 |= r3; \
     r2 = ~r2; r3 ^= r1; \
     r1 |= r0; r0 ^= r2; \
     r2 &= r4; r3 &= r4; \
     r1 ^= r2; r2 ^= r0; \
     r0 |= r2; r4 ^= r1; \
     r0 ^= r3; r3 ^= r4; \
     r4 |= r0; r3 ^= r2; \
     r4 ^= r2; \
     \
     w = r3; x = r0; y = r1; z = r4; \
   }
 
 /* XOR BLOCK1 into BLOCK0.  */
 #define BLOCK_XOR(block0, block1) \
   {                               \
     block0[0] ^= block1[0];       \
     block0[1] ^= block1[1];       \
     block0[2] ^= block1[2];       \
     block0[3] ^= block1[3];       \
   }
 
 /* Copy BLOCK_SRC to BLOCK_DST.  */
 #define BLOCK_COPY(block_dst, block_src) \
   {                                      \
     block_dst[0] = block_src[0];         \
     block_dst[1] = block_src[1];         \
     block_dst[2] = block_src[2];         \
     block_dst[3] = block_src[3];         \
   }
 
 /* Apply SBOX number WHICH to to the block found in ARRAY0, writing
    the output to the block found in ARRAY1.  */
 #define SBOX(which, array0, array1)                         \
   SBOX##which (array0[0], array0[1], array0[2], array0[3],  \
                array1[0], array1[1], array1[2], array1[3]);
 
 /* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing
    the output to the block found in ARRAY1.  */
 #define SBOX_INVERSE(which, array0, array1)                           \
   SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3],  \
                          array1[0], array1[1], array1[2], array1[3]);
 
 /* Apply the linear transformation to BLOCK.  */
 #define LINEAR_TRANSFORMATION(block)                  \
   {                                                   \
     block[0] = rol (block[0], 13);                    \
     block[2] = rol (block[2], 3);                     \
     block[1] = block[1] ^ block[0] ^ block[2];        \
     block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
     block[1] = rol (block[1], 1);                     \
     block[3] = rol (block[3], 7);                     \
     block[0] = block[0] ^ block[1] ^ block[3];        \
     block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
     block[0] = rol (block[0], 5);                     \
     block[2] = rol (block[2], 22);                    \
   }
 
 /* Apply the inverse linear transformation to BLOCK.  */
 #define LINEAR_TRANSFORMATION_INVERSE(block)          \
   {                                                   \
     block[2] = ror (block[2], 22);                    \
     block[0] = ror (block[0] , 5);                    \
     block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
     block[0] = block[0] ^ block[1] ^ block[3];        \
     block[3] = ror (block[3], 7);                     \
     block[1] = ror (block[1], 1);                     \
     block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
     block[1] = block[1] ^ block[0] ^ block[2];        \
     block[2] = ror (block[2], 3);                     \
     block[0] = ror (block[0], 13);                    \
   }
 
 /* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the
    subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary storage.
    This macro increments `round'.  */
 #define ROUND(which, subkeys, block, block_tmp) \
   {                                             \
     BLOCK_XOR (block, subkeys[round]);          \
     round++;                                    \
     SBOX (which, block, block_tmp);             \
     LINEAR_TRANSFORMATION (block_tmp);          \
     BLOCK_COPY (block, block_tmp);              \
   }
 
 /* Apply the last Serpent round to BLOCK, using the SBOX number WHICH
    and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary
    storage.  The result will be stored in BLOCK_TMP.  This macro
    increments `round'.  */
 #define ROUND_LAST(which, subkeys, block, block_tmp) \
   {                                                  \
     BLOCK_XOR (block, subkeys[round]);               \
     round++;                                         \
     SBOX (which, block, block_tmp);                  \
     BLOCK_XOR (block_tmp, subkeys[round]);           \
     round++;                                         \
   }
 
 /* Apply an inverse Serpent round to BLOCK, using the SBOX number
    WHICH and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as
    temporary storage.  This macro increments `round'.  */
 #define ROUND_INVERSE(which, subkey, block, block_tmp) \
   {                                                    \
     LINEAR_TRANSFORMATION_INVERSE (block);             \
     SBOX_INVERSE (which, block, block_tmp);            \
     BLOCK_XOR (block_tmp, subkey[round]);              \
     round--;                                           \
     BLOCK_COPY (block, block_tmp);                     \
   }
 
 /* Apply the first Serpent round to BLOCK, using the SBOX number WHICH
    and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary
    storage.  The result will be stored in BLOCK_TMP.  This macro
    increments `round'.  */
 #define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \
   {                                                           \
     BLOCK_XOR (block, subkeys[round]);                        \
     round--;                                                  \
     SBOX_INVERSE (which, block, block_tmp);                   \
     BLOCK_XOR (block_tmp, subkeys[round]);                    \
     round--;                                                  \
   }
 
 /* Convert the user provided key KEY of KEY_LENGTH bytes into the
    internally used format.  */
 static void
 serpent_key_prepare (const byte *key, unsigned int key_length,
 		     serpent_key_t key_prepared)
 {
   int i;
 
   /* Copy key.  */
   key_length /= 4;
   for (i = 0; i < key_length; i++)
     key_prepared[i] = buf_get_le32 (key + i * 4);
 
   if (i < 8)
     {
       /* Key must be padded according to the Serpent
 	 specification.  */
       key_prepared[i] = 0x00000001;
 
       for (i++; i < 8; i++)
 	key_prepared[i] = 0;
     }
 }
 
 /* Derive the 33 subkeys from KEY and store them in SUBKEYS.  */
 static void
 serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys)
 {
   u32 w[8];		/* The `prekey'.  */
   u32 ws[4];
   u32 wt[4];
 
   /* Initialize with key values.  */
   w[0] = key[0];
   w[1] = key[1];
   w[2] = key[2];
   w[3] = key[3];
   w[4] = key[4];
   w[5] = key[5];
   w[6] = key[6];
   w[7] = key[7];
 
   /* Expand to intermediate key using the affine recurrence.  */
 #define EXPAND_KEY4(wo, r)                                                     \
   wo[0] = w[(r+0)%8] =                                                         \
     rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \
   wo[1] = w[(r+1)%8] =                                                         \
     rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \
   wo[2] = w[(r+2)%8] =                                                         \
     rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \
   wo[3] = w[(r+3)%8] =                                                         \
     rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11);
 
 #define EXPAND_KEY(r)       \
   EXPAND_KEY4(ws, (r));     \
   EXPAND_KEY4(wt, (r + 4));
 
   /* Calculate subkeys via S-Boxes, in bitslice mode.  */
   EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]);
   EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]);
   EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]);
   EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]);
   EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]);
   EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]);
   EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]);
   EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]);
   EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]);
   EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]);
   EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]);
   EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]);
   EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]);
   EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]);
   EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]);
   EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]);
   EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]);
 
   wipememory (ws, sizeof (ws));
   wipememory (wt, sizeof (wt));
   wipememory (w, sizeof (w));
 }
 
 /* Initialize CONTEXT with the key KEY of KEY_LENGTH bits.  */
 static gcry_err_code_t
 serpent_setkey_internal (serpent_context_t *context,
 			 const byte *key, unsigned int key_length)
 {
   serpent_key_t key_prepared;
 
   if (key_length > 32)
     return GPG_ERR_INV_KEYLEN;
 
   serpent_key_prepare (key, key_length, key_prepared);
   serpent_subkeys_generate (key_prepared, context->keys);
 
 #ifdef USE_AVX2
   context->use_avx2 = 0;
   if ((_gcry_get_hw_features () & HWF_INTEL_AVX2))
     {
       context->use_avx2 = 1;
     }
 #endif
 
 #ifdef USE_NEON
   context->use_neon = 0;
   if ((_gcry_get_hw_features () & HWF_ARM_NEON))
     {
       context->use_neon = 1;
     }
 #endif
 
   wipememory (key_prepared, sizeof(key_prepared));
   return 0;
 }
 
 /* Initialize CTX with the key KEY of KEY_LENGTH bytes.  */
 static gcry_err_code_t
 serpent_setkey (void *ctx,
 		const byte *key, unsigned int key_length,
                 cipher_bulk_ops_t *bulk_ops)
 {
   serpent_context_t *context = ctx;
   static const char *serpent_test_ret;
   static int serpent_init_done;
   gcry_err_code_t ret = GPG_ERR_NO_ERROR;
 
   if (! serpent_init_done)
     {
       /* Execute a self-test the first time, Serpent is used.  */
       serpent_init_done = 1;
       serpent_test_ret = serpent_test ();
       if (serpent_test_ret)
 	log_error ("Serpent test failure: %s\n", serpent_test_ret);
     }
 
   /* Setup bulk encryption routines.  */
   memset (bulk_ops, 0, sizeof(*bulk_ops));
   bulk_ops->cbc_dec = _gcry_serpent_cbc_dec;
   bulk_ops->cfb_dec = _gcry_serpent_cfb_dec;
   bulk_ops->ctr_enc = _gcry_serpent_ctr_enc;
   bulk_ops->ocb_crypt = _gcry_serpent_ocb_crypt;
   bulk_ops->ocb_auth = _gcry_serpent_ocb_auth;
   bulk_ops->xts_crypt = _gcry_serpent_xts_crypt;
   bulk_ops->ecb_crypt = _gcry_serpent_ecb_crypt;
 
   if (serpent_test_ret)
     ret = GPG_ERR_SELFTEST_FAILED;
   else
     ret = serpent_setkey_internal (context, key, key_length);
 
   return ret;
 }
 
 static void
 serpent_encrypt_internal (serpent_context_t *context,
 			  const byte *input, byte *output)
 {
   serpent_block_t b, b_next;
   int round = 0;
 
   b[0] = buf_get_le32 (input + 0);
   b[1] = buf_get_le32 (input + 4);
   b[2] = buf_get_le32 (input + 8);
   b[3] = buf_get_le32 (input + 12);
 
   ROUND (0, context->keys, b, b_next);
   ROUND (1, context->keys, b, b_next);
   ROUND (2, context->keys, b, b_next);
   ROUND (3, context->keys, b, b_next);
   ROUND (4, context->keys, b, b_next);
   ROUND (5, context->keys, b, b_next);
   ROUND (6, context->keys, b, b_next);
   ROUND (7, context->keys, b, b_next);
   ROUND (0, context->keys, b, b_next);
   ROUND (1, context->keys, b, b_next);
   ROUND (2, context->keys, b, b_next);
   ROUND (3, context->keys, b, b_next);
   ROUND (4, context->keys, b, b_next);
   ROUND (5, context->keys, b, b_next);
   ROUND (6, context->keys, b, b_next);
   ROUND (7, context->keys, b, b_next);
   ROUND (0, context->keys, b, b_next);
   ROUND (1, context->keys, b, b_next);
   ROUND (2, context->keys, b, b_next);
   ROUND (3, context->keys, b, b_next);
   ROUND (4, context->keys, b, b_next);
   ROUND (5, context->keys, b, b_next);
   ROUND (6, context->keys, b, b_next);
   ROUND (7, context->keys, b, b_next);
   ROUND (0, context->keys, b, b_next);
   ROUND (1, context->keys, b, b_next);
   ROUND (2, context->keys, b, b_next);
   ROUND (3, context->keys, b, b_next);
   ROUND (4, context->keys, b, b_next);
   ROUND (5, context->keys, b, b_next);
   ROUND (6, context->keys, b, b_next);
 
   ROUND_LAST (7, context->keys, b, b_next);
 
   buf_put_le32 (output + 0, b_next[0]);
   buf_put_le32 (output + 4, b_next[1]);
   buf_put_le32 (output + 8, b_next[2]);
   buf_put_le32 (output + 12, b_next[3]);
 }
 
 static void
 serpent_decrypt_internal (serpent_context_t *context,
 			  const byte *input, byte *output)
 {
   serpent_block_t b, b_next;
   int round = ROUNDS;
 
   b_next[0] = buf_get_le32 (input + 0);
   b_next[1] = buf_get_le32 (input + 4);
   b_next[2] = buf_get_le32 (input + 8);
   b_next[3] = buf_get_le32 (input + 12);
 
   ROUND_FIRST_INVERSE (7, context->keys, b_next, b);
 
   ROUND_INVERSE (6, context->keys, b, b_next);
   ROUND_INVERSE (5, context->keys, b, b_next);
   ROUND_INVERSE (4, context->keys, b, b_next);
   ROUND_INVERSE (3, context->keys, b, b_next);
   ROUND_INVERSE (2, context->keys, b, b_next);
   ROUND_INVERSE (1, context->keys, b, b_next);
   ROUND_INVERSE (0, context->keys, b, b_next);
   ROUND_INVERSE (7, context->keys, b, b_next);
   ROUND_INVERSE (6, context->keys, b, b_next);
   ROUND_INVERSE (5, context->keys, b, b_next);
   ROUND_INVERSE (4, context->keys, b, b_next);
   ROUND_INVERSE (3, context->keys, b, b_next);
   ROUND_INVERSE (2, context->keys, b, b_next);
   ROUND_INVERSE (1, context->keys, b, b_next);
   ROUND_INVERSE (0, context->keys, b, b_next);
   ROUND_INVERSE (7, context->keys, b, b_next);
   ROUND_INVERSE (6, context->keys, b, b_next);
   ROUND_INVERSE (5, context->keys, b, b_next);
   ROUND_INVERSE (4, context->keys, b, b_next);
   ROUND_INVERSE (3, context->keys, b, b_next);
   ROUND_INVERSE (2, context->keys, b, b_next);
   ROUND_INVERSE (1, context->keys, b, b_next);
   ROUND_INVERSE (0, context->keys, b, b_next);
   ROUND_INVERSE (7, context->keys, b, b_next);
   ROUND_INVERSE (6, context->keys, b, b_next);
   ROUND_INVERSE (5, context->keys, b, b_next);
   ROUND_INVERSE (4, context->keys, b, b_next);
   ROUND_INVERSE (3, context->keys, b, b_next);
   ROUND_INVERSE (2, context->keys, b, b_next);
   ROUND_INVERSE (1, context->keys, b, b_next);
   ROUND_INVERSE (0, context->keys, b, b_next);
 
   buf_put_le32 (output + 0, b_next[0]);
   buf_put_le32 (output + 4, b_next[1]);
   buf_put_le32 (output + 8, b_next[2]);
   buf_put_le32 (output + 12, b_next[3]);
 }
 
 static unsigned int
 serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
 {
   serpent_context_t *context = ctx;
 
   serpent_encrypt_internal (context, buffer_in, buffer_out);
   return /*burn_stack*/ (2 * sizeof (serpent_block_t));
 }
 
 static unsigned int
 serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
 {
   serpent_context_t *context = ctx;
 
   serpent_decrypt_internal (context, buffer_in, buffer_out);
   return /*burn_stack*/ (2 * sizeof (serpent_block_t));
 }
 
 
 
 /* Bulk encryption of complete blocks in CTR mode.  This function is only
    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
    of size sizeof(serpent_block_t). */
 static void
 _gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
                       void *outbuf_arg, const void *inbuf_arg,
                       size_t nblocks)
 {
   serpent_context_t *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char tmpbuf[sizeof(serpent_block_t)];
   int burn_stack_depth = 2 * sizeof (serpent_block_t);
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
 
       /* Process data in 16 block chunks. */
       while (nblocks >= 16)
         {
           _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
 
           nblocks -= 16;
           outbuf += 16 * sizeof(serpent_block_t);
           inbuf  += 16 * sizeof(serpent_block_t);
           did_use_avx2 = 1;
         }
 
       if (did_use_avx2)
         {
           /* serpent-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
 
       /* Use generic/sse2 code to handle smaller chunks... */
       /* TODO: use caching instead? */
     }
 #endif
 
 #ifdef USE_SSE2
   {
     int did_use_sse2 = 0;
 
     /* Process data in 8 block chunks. */
     while (nblocks >= 8)
       {
         _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 8;
         outbuf += 8 * sizeof(serpent_block_t);
         inbuf  += 8 * sizeof(serpent_block_t);
         did_use_sse2 = 1;
       }
 
     if (did_use_sse2)
       {
         /* serpent-sse2 assembly code does not use stack */
         if (nblocks == 0)
           burn_stack_depth = 0;
       }
 
     /* Use generic code to handle smaller chunks... */
     /* TODO: use caching instead? */
   }
 #endif
 
 #ifdef USE_NEON
   if (ctx->use_neon)
     {
       int did_use_neon = 0;
 
       /* Process data in 8 block chunks. */
       while (nblocks >= 8)
         {
           _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr);
 
           nblocks -= 8;
           outbuf += 8 * sizeof(serpent_block_t);
           inbuf  += 8 * sizeof(serpent_block_t);
           did_use_neon = 1;
         }
 
       if (did_use_neon)
         {
           /* serpent-neon assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
 
       /* Use generic code to handle smaller chunks... */
       /* TODO: use caching instead? */
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* Encrypt the counter. */
       serpent_encrypt_internal(ctx, ctr, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
       cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
       outbuf += sizeof(serpent_block_t);
       inbuf  += sizeof(serpent_block_t);
       /* Increment the counter.  */
       cipher_block_add(ctr, 1, sizeof(serpent_block_t));
     }
 
   wipememory(tmpbuf, sizeof(tmpbuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 /* Bulk decryption of complete blocks in CBC mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_serpent_cbc_dec(void *context, unsigned char *iv,
                       void *outbuf_arg, const void *inbuf_arg,
                       size_t nblocks)
 {
   serpent_context_t *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char savebuf[sizeof(serpent_block_t)];
   int burn_stack_depth = 2 * sizeof (serpent_block_t);
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
 
       /* Process data in 16 block chunks. */
       while (nblocks >= 16)
         {
           _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
 
           nblocks -= 16;
           outbuf += 16 * sizeof(serpent_block_t);
           inbuf  += 16 * sizeof(serpent_block_t);
           did_use_avx2 = 1;
         }
 
       if (did_use_avx2)
         {
           /* serpent-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
 
       /* Use generic/sse2 code to handle smaller chunks... */
     }
 #endif
 
 #ifdef USE_SSE2
   {
     int did_use_sse2 = 0;
 
     /* Process data in 8 block chunks. */
     while (nblocks >= 8)
       {
         _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 8;
         outbuf += 8 * sizeof(serpent_block_t);
         inbuf  += 8 * sizeof(serpent_block_t);
         did_use_sse2 = 1;
       }
 
     if (did_use_sse2)
       {
         /* serpent-sse2 assembly code does not use stack */
         if (nblocks == 0)
           burn_stack_depth = 0;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #ifdef USE_NEON
   if (ctx->use_neon)
     {
       int did_use_neon = 0;
 
       /* Process data in 8 block chunks. */
       while (nblocks >= 8)
         {
           _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv);
 
           nblocks -= 8;
           outbuf += 8 * sizeof(serpent_block_t);
           inbuf  += 8 * sizeof(serpent_block_t);
           did_use_neon = 1;
         }
 
       if (did_use_neon)
         {
           /* serpent-neon assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
 
       /* Use generic code to handle smaller chunks... */
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* INBUF is needed later and it may be identical to OUTBUF, so store
          the intermediate result to SAVEBUF.  */
       serpent_decrypt_internal (ctx, inbuf, savebuf);
 
       cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
                                 sizeof(serpent_block_t));
       inbuf += sizeof(serpent_block_t);
       outbuf += sizeof(serpent_block_t);
     }
 
   wipememory(savebuf, sizeof(savebuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 /* Bulk decryption of complete blocks in CFB mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_serpent_cfb_dec(void *context, unsigned char *iv,
                       void *outbuf_arg, const void *inbuf_arg,
                       size_t nblocks)
 {
   serpent_context_t *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   int burn_stack_depth = 2 * sizeof (serpent_block_t);
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
 
       /* Process data in 16 block chunks. */
       while (nblocks >= 16)
         {
           _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
 
           nblocks -= 16;
           outbuf += 16 * sizeof(serpent_block_t);
           inbuf  += 16 * sizeof(serpent_block_t);
           did_use_avx2 = 1;
         }
 
       if (did_use_avx2)
         {
           /* serpent-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
 
       /* Use generic/sse2 code to handle smaller chunks... */
     }
 #endif
 
 #ifdef USE_SSE2
   {
     int did_use_sse2 = 0;
 
     /* Process data in 8 block chunks. */
     while (nblocks >= 8)
       {
         _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 8;
         outbuf += 8 * sizeof(serpent_block_t);
         inbuf  += 8 * sizeof(serpent_block_t);
         did_use_sse2 = 1;
       }
 
     if (did_use_sse2)
       {
         /* serpent-sse2 assembly code does not use stack */
         if (nblocks == 0)
           burn_stack_depth = 0;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #ifdef USE_NEON
   if (ctx->use_neon)
     {
       int did_use_neon = 0;
 
       /* Process data in 8 block chunks. */
       while (nblocks >= 8)
         {
           _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv);
 
           nblocks -= 8;
           outbuf += 8 * sizeof(serpent_block_t);
           inbuf  += 8 * sizeof(serpent_block_t);
           did_use_neon = 1;
         }
 
       if (did_use_neon)
         {
           /* serpent-neon assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
 
       /* Use generic code to handle smaller chunks... */
     }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       serpent_encrypt_internal(ctx, iv, iv);
       cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
       outbuf += sizeof(serpent_block_t);
       inbuf  += sizeof(serpent_block_t);
     }
 
   _gcry_burn_stack(burn_stack_depth);
 }
 
 /* Bulk encryption/decryption of complete blocks in OCB mode. */
 static size_t
 _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 			const void *inbuf_arg, size_t nblocks, int encrypt)
 {
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   serpent_context_t *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   int burn_stack_depth = 2 * sizeof (serpent_block_t);
   u64 blkn = c->u_mode.ocb.data_nblocks;
 #else
   (void)c;
   (void)outbuf_arg;
   (void)inbuf_arg;
   (void)encrypt;
 #endif
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
       u64 Ls[16];
       u64 *l;
 
       if (nblocks >= 16)
 	{
           l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
 	      blkn += 16;
 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      if (encrypt)
 		_gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
 					  c->u_ctr.ctr, Ls);
 	      else
 		_gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
 					  c->u_ctr.ctr, Ls);
 
 	      nblocks -= 16;
 	      outbuf += 16 * sizeof(serpent_block_t);
 	      inbuf  += 16 * sizeof(serpent_block_t);
 	      did_use_avx2 = 1;
 	    }
 	}
 
       if (did_use_avx2)
 	{
 	  /* serpent-avx2 assembly code does not use stack */
 	  if (nblocks == 0)
 	    burn_stack_depth = 0;
 	}
 
       /* Use generic code to handle smaller chunks... */
     }
 #endif
 
 #ifdef USE_SSE2
   {
     int did_use_sse2 = 0;
     u64 Ls[8];
     u64 *l;
 
     if (nblocks >= 8)
       {
         l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	/* Process data in 8 block chunks. */
 	while (nblocks >= 8)
 	  {
 	    blkn += 8;
 	    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
 
 	    if (encrypt)
 	      _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
 					  c->u_ctr.ctr, Ls);
 	    else
 	      _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
 					  c->u_ctr.ctr, Ls);
 
 	    nblocks -= 8;
 	    outbuf += 8 * sizeof(serpent_block_t);
 	    inbuf  += 8 * sizeof(serpent_block_t);
 	    did_use_sse2 = 1;
 	  }
       }
 
     if (did_use_sse2)
       {
 	/* serpent-sse2 assembly code does not use stack */
 	if (nblocks == 0)
 	  burn_stack_depth = 0;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #ifdef USE_NEON
   if (ctx->use_neon)
     {
       int did_use_neon = 0;
       uintptr_t Ls[8];
       uintptr_t *l;
 
       if (nblocks >= 8)
 	{
           l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
 	    {
 	      blkn += 8;
 	      *l = (uintptr_t)(void *)ocb_get_l(c,  blkn - blkn % 8);
 
 	      if (encrypt)
 		_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
 					   c->u_ctr.ctr, (const void **)Ls);
 	      else
 		_gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
 					   c->u_ctr.ctr, (const void **)Ls);
 
 	      nblocks -= 8;
 	      outbuf += 8 * sizeof(serpent_block_t);
 	      inbuf  += 8 * sizeof(serpent_block_t);
 	      did_use_neon = 1;
 	    }
 	}
 
       if (did_use_neon)
 	{
 	  /* serpent-neon assembly code does not use stack */
 	  if (nblocks == 0)
 	    burn_stack_depth = 0;
 	}
 
       /* Use generic code to handle smaller chunks... */
     }
 #endif
 
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   c->u_mode.ocb.data_nblocks = blkn;
 
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
 
   return nblocks;
 }
 
 /* Bulk authentication of complete blocks in OCB mode. */
 static size_t
 _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 			size_t nblocks)
 {
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   serpent_context_t *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   int burn_stack_depth = 2 * sizeof(serpent_block_t);
   u64 blkn = c->u_mode.ocb.aad_nblocks;
 #else
   (void)c;
   (void)abuf_arg;
 #endif
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
       u64 Ls[16];
       u64 *l;
 
       if (nblocks >= 16)
 	{
         l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
 	      blkn += 16;
 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					  c->u_mode.ocb.aad_sum, Ls);
 
 	      nblocks -= 16;
 	      abuf += 16 * sizeof(serpent_block_t);
 	      did_use_avx2 = 1;
 	    }
 	}
 
       if (did_use_avx2)
 	{
 	  /* serpent-avx2 assembly code does not use stack */
 	  if (nblocks == 0)
 	    burn_stack_depth = 0;
 	}
 
       /* Use generic code to handle smaller chunks... */
     }
 #endif
 
 #ifdef USE_SSE2
   {
     int did_use_sse2 = 0;
     u64 Ls[8];
     u64 *l;
 
     if (nblocks >= 8)
       {
         l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	/* Process data in 8 block chunks. */
 	while (nblocks >= 8)
 	  {
 	    blkn += 8;
 	    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
 
 	    _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					c->u_mode.ocb.aad_sum, Ls);
 
 	    nblocks -= 8;
 	    abuf += 8 * sizeof(serpent_block_t);
 	    did_use_sse2 = 1;
 	  }
       }
 
     if (did_use_sse2)
       {
 	/* serpent-avx2 assembly code does not use stack */
 	if (nblocks == 0)
 	  burn_stack_depth = 0;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
 #ifdef USE_NEON
   if (ctx->use_neon)
     {
       int did_use_neon = 0;
       uintptr_t Ls[8];
       uintptr_t *l;
 
       if (nblocks >= 8)
 	{
           l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
 	    {
 	      blkn += 8;
 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
 
 	      _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					  c->u_mode.ocb.aad_sum,
 					  (const void **)Ls);
 
 	      nblocks -= 8;
 	      abuf += 8 * sizeof(serpent_block_t);
 	      did_use_neon = 1;
 	    }
 	}
 
       if (did_use_neon)
 	{
 	  /* serpent-neon assembly code does not use stack */
 	  if (nblocks == 0)
 	    burn_stack_depth = 0;
 	}
 
       /* Use generic code to handle smaller chunks... */
     }
 #endif
 
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   c->u_mode.ocb.aad_nblocks = blkn;
 
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
 
   return nblocks;
 }
 
 
 static unsigned int
 serpent_crypt_blk1_16(void *context, byte *out, const byte *in,
 		      size_t num_blks, int encrypt)
 {
   serpent_context_t *ctx = context;
   unsigned int burn, burn_stack_depth = 0;
 
 #ifdef USE_AVX2
   if (num_blks == 16 && ctx->use_avx2)
     {
       _gcry_serpent_avx2_blk16 (ctx, out, in, encrypt);
       return 0;
     }
 #endif
 
 #ifdef USE_SSE2
   while (num_blks >= 8)
     {
       _gcry_serpent_sse2_blk8 (ctx, out, in, encrypt);
       out += 8 * sizeof(serpent_block_t);
       in += 8 * sizeof(serpent_block_t);
       num_blks -= 8;
     }
 #endif
 
 #ifdef USE_NEON
   if (ctx->use_neon)
     {
       while (num_blks >= 8)
 	{
 	  _gcry_serpent_neon_blk8 (ctx, out, in, encrypt);
 	  out += 8 * sizeof(serpent_block_t);
 	  in += 8 * sizeof(serpent_block_t);
 	  num_blks -= 8;
 	}
     }
 #endif
 
   while (num_blks >= 1)
     {
       if (encrypt)
 	serpent_encrypt_internal((void *)ctx, in, out);
       else
 	serpent_decrypt_internal((void *)ctx, in, out);
 
       burn = 2 * sizeof(serpent_block_t);
       burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth;
       out += sizeof(serpent_block_t);
       in += sizeof(serpent_block_t);
       num_blks--;
     }
 
   return burn_stack_depth;
 }
 
 static unsigned int
 serpent_encrypt_blk1_16(void *ctx, byte *out, const byte *in,
 			size_t num_blks)
 {
   return serpent_crypt_blk1_16 (ctx, out, in, num_blks, 1);
 }
 
 static unsigned int
 serpent_decrypt_blk1_16(void *ctx, byte *out, const byte *in,
 			size_t num_blks)
 {
   return serpent_crypt_blk1_16 (ctx, out, in, num_blks, 0);
 }
 
 
 /* Bulk encryption/decryption of complete blocks in XTS mode. */
 static void
 _gcry_serpent_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg,
 			 const void *inbuf_arg, size_t nblocks, int encrypt)
 {
   serpent_context_t *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   int burn_stack_depth = 0;
 
   /* Process remaining blocks. */
   if (nblocks)
     {
       unsigned char tmpbuf[16 * 16];
       unsigned int tmp_used = 16;
       size_t nburn;
 
       nburn = bulk_xts_crypt_128(ctx, encrypt ? serpent_encrypt_blk1_16
                                               : serpent_decrypt_blk1_16,
                                  outbuf, inbuf, nblocks,
                                  tweak, tmpbuf, sizeof(tmpbuf) / 16,
                                  &tmp_used);
       burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
 
       wipememory(tmpbuf, tmp_used);
     }
 
   if (burn_stack_depth)
     _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk encryption/decryption in ECB mode. */
 static void
 _gcry_serpent_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg,
 			 size_t nblocks, int encrypt)
 {
   serpent_context_t *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   int burn_stack_depth = 0;
 
   /* Process remaining blocks. */
   if (nblocks)
     {
       size_t nburn;
 
       nburn = bulk_ecb_crypt_128(ctx, encrypt ? serpent_encrypt_blk1_16
                                               : serpent_decrypt_blk1_16,
                                  outbuf, inbuf, nblocks, 16);
       burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
     }
 
   if (burn_stack_depth)
     _gcry_burn_stack(burn_stack_depth);
 }
 
 
 
 /* Serpent test.  */
 
 static const char *
 serpent_test (void)
 {
   serpent_context_t context;
   unsigned char scratch[16];
   unsigned int i;
 
   static struct test
   {
     int key_length;
     unsigned char key[32];
     unsigned char text_plain[16];
     unsigned char text_cipher[16];
   } test_data[] =
     {
       {
 	16,
 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
 	"\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E",
 	"\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D"
       },
       {
 	24,
 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 	"\x00\x00\x00\x00\x00\x00\x00\x00",
 	"\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E",
 	"\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9"
       },
       {
 	32,
 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
 	"\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E",
 	"\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B"
       },
       {
 	32,
 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
 	"\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00",
 	"\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C"
       },
       {
 	0
       },
     };
 
   for (i = 0; test_data[i].key_length; i++)
     {
       serpent_setkey_internal (&context, test_data[i].key,
                                test_data[i].key_length);
       serpent_encrypt_internal (&context, test_data[i].text_plain, scratch);
 
       if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t)))
 	switch (test_data[i].key_length)
 	  {
 	  case 16:
 	    return "Serpent-128 test encryption failed.";
 	  case  24:
 	    return "Serpent-192 test encryption failed.";
 	  case 32:
 	    return "Serpent-256 test encryption failed.";
 	  }
 
     serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch);
     if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t)))
       switch (test_data[i].key_length)
 	{
 	case 16:
 	  return "Serpent-128 test decryption failed.";
 	case  24:
 	  return "Serpent-192 test decryption failed.";
 	case 32:
 	  return "Serpent-256 test decryption failed.";
 	}
     }
 
   return NULL;
 }
 
 
 static const gcry_cipher_oid_spec_t serpent128_oids[] =
   {
     {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB },
     {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC },
     {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB },
     {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB },
     { NULL }
   };
 
 static const gcry_cipher_oid_spec_t serpent192_oids[] =
   {
     {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB },
     {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC },
     {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB },
     {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB },
     { NULL }
   };
 
 static const gcry_cipher_oid_spec_t serpent256_oids[] =
   {
     {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB },
     {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC },
     {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB },
     {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB },
     { NULL }
   };
 
 static const char *serpent128_aliases[] =
   {
     "SERPENT",
     "SERPENT-128",
     NULL
   };
 static const char *serpent192_aliases[] =
   {
     "SERPENT-192",
     NULL
   };
 static const char *serpent256_aliases[] =
   {
     "SERPENT-256",
     NULL
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
   {
     GCRY_CIPHER_SERPENT128, {0, 0},
     "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128,
     sizeof (serpent_context_t),
     serpent_setkey, serpent_encrypt, serpent_decrypt
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
   {
     GCRY_CIPHER_SERPENT192, {0, 0},
     "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192,
     sizeof (serpent_context_t),
     serpent_setkey, serpent_encrypt, serpent_decrypt
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_serpent256 =
   {
     GCRY_CIPHER_SERPENT256, {0, 0},
     "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256,
     sizeof (serpent_context_t),
     serpent_setkey, serpent_encrypt, serpent_decrypt
   };
diff --git a/cipher/tiger.c b/cipher/tiger.c
index ae55359c..4e6b1239 100644
--- a/cipher/tiger.c
+++ b/cipher/tiger.c
@@ -1,860 +1,860 @@
 /* tiger.c  -  The TIGER hash function
  * Copyright (C) 1998, 2001, 2002, 2003, 2010 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 /* See http://www.cs.technion.ac.il/~biham/Reports/Tiger/  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 #include "cipher.h"
 #include "hash-common.h"
 #include "bithelp.h"
 #include "bufhelp.h"
 
 typedef struct
 {
   gcry_md_block_ctx_t bctx;
   u64  a, b, c;
   int  variant;  /* 0 = old code, 1 = fixed code, 2 - TIGER2.  */
 } TIGER_CONTEXT;
 
 
 /*********************************
  * Okay, okay, this is not the fastest code - improvements are welcome.
  *
  */
 
 /* Some test vectors:
  * ""                   24F0130C63AC9332 16166E76B1BB925F F373DE2D49584E7A
  * "abc"                F258C1E88414AB2A 527AB541FFC5B8BF 935F7B951C132951
  * "Tiger"              9F00F599072300DD 276ABB38C8EB6DEC 37790C116F9D2BDF
  * "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"
  *			87FB2A9083851CF7 470D2CF810E6DF9E B586445034A5A386
  * "ABCDEFGHIJKLMNOPQRSTUVWXYZ=abcdefghijklmnopqrstuvwxyz+0123456789"
  *			467DB80863EBCE48 8DF1CD1261655DE9 57896565975F9197
  * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham"
  *			0C410A042968868A 1671DA5A3FD29A72 5EC1E457D3CDB303
  * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham, proc"
  * "eedings of Fast Software Encryption 3, Cambridge."
  *			EBF591D5AFA655CE 7F22894FF87F54AC 89C811B6B0DA3193
  * "Tiger - A Fast New Hash Function, by Ross Anderson and Eli Biham, proc"
  * "eedings of Fast Software Encryption 3, Cambridge, 1996."
  *			3D9AEB03D1BD1A63 57B2774DFD6D5B24 DD68151D503974FC
  * "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-ABCDEF"
  * "GHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"
  *			00B83EB4E53440C5 76AC6AAEE0A74858 25FD15E70A59FFE4
  */
 
 static u64 sbox1[256] = {
   U64_C(0x02aab17cf7e90c5e) /*    0 */, U64_C(0xac424b03e243a8ec) /*    1 */,
   U64_C(0x72cd5be30dd5fcd3) /*    2 */, U64_C(0x6d019b93f6f97f3a) /*    3 */,
   U64_C(0xcd9978ffd21f9193) /*    4 */, U64_C(0x7573a1c9708029e2) /*    5 */,
   U64_C(0xb164326b922a83c3) /*    6 */, U64_C(0x46883eee04915870) /*    7 */,
   U64_C(0xeaace3057103ece6) /*    8 */, U64_C(0xc54169b808a3535c) /*    9 */,
   U64_C(0x4ce754918ddec47c) /*   10 */, U64_C(0x0aa2f4dfdc0df40c) /*   11 */,
   U64_C(0x10b76f18a74dbefa) /*   12 */, U64_C(0xc6ccb6235ad1ab6a) /*   13 */,
   U64_C(0x13726121572fe2ff) /*   14 */, U64_C(0x1a488c6f199d921e) /*   15 */,
   U64_C(0x4bc9f9f4da0007ca) /*   16 */, U64_C(0x26f5e6f6e85241c7) /*   17 */,
   U64_C(0x859079dbea5947b6) /*   18 */, U64_C(0x4f1885c5c99e8c92) /*   19 */,
   U64_C(0xd78e761ea96f864b) /*   20 */, U64_C(0x8e36428c52b5c17d) /*   21 */,
   U64_C(0x69cf6827373063c1) /*   22 */, U64_C(0xb607c93d9bb4c56e) /*   23 */,
   U64_C(0x7d820e760e76b5ea) /*   24 */, U64_C(0x645c9cc6f07fdc42) /*   25 */,
   U64_C(0xbf38a078243342e0) /*   26 */, U64_C(0x5f6b343c9d2e7d04) /*   27 */,
   U64_C(0xf2c28aeb600b0ec6) /*   28 */, U64_C(0x6c0ed85f7254bcac) /*   29 */,
   U64_C(0x71592281a4db4fe5) /*   30 */, U64_C(0x1967fa69ce0fed9f) /*   31 */,
   U64_C(0xfd5293f8b96545db) /*   32 */, U64_C(0xc879e9d7f2a7600b) /*   33 */,
   U64_C(0x860248920193194e) /*   34 */, U64_C(0xa4f9533b2d9cc0b3) /*   35 */,
   U64_C(0x9053836c15957613) /*   36 */, U64_C(0xdb6dcf8afc357bf1) /*   37 */,
   U64_C(0x18beea7a7a370f57) /*   38 */, U64_C(0x037117ca50b99066) /*   39 */,
   U64_C(0x6ab30a9774424a35) /*   40 */, U64_C(0xf4e92f02e325249b) /*   41 */,
   U64_C(0x7739db07061ccae1) /*   42 */, U64_C(0xd8f3b49ceca42a05) /*   43 */,
   U64_C(0xbd56be3f51382f73) /*   44 */, U64_C(0x45faed5843b0bb28) /*   45 */,
   U64_C(0x1c813d5c11bf1f83) /*   46 */, U64_C(0x8af0e4b6d75fa169) /*   47 */,
   U64_C(0x33ee18a487ad9999) /*   48 */, U64_C(0x3c26e8eab1c94410) /*   49 */,
   U64_C(0xb510102bc0a822f9) /*   50 */, U64_C(0x141eef310ce6123b) /*   51 */,
   U64_C(0xfc65b90059ddb154) /*   52 */, U64_C(0xe0158640c5e0e607) /*   53 */,
   U64_C(0x884e079826c3a3cf) /*   54 */, U64_C(0x930d0d9523c535fd) /*   55 */,
   U64_C(0x35638d754e9a2b00) /*   56 */, U64_C(0x4085fccf40469dd5) /*   57 */,
   U64_C(0xc4b17ad28be23a4c) /*   58 */, U64_C(0xcab2f0fc6a3e6a2e) /*   59 */,
   U64_C(0x2860971a6b943fcd) /*   60 */, U64_C(0x3dde6ee212e30446) /*   61 */,
   U64_C(0x6222f32ae01765ae) /*   62 */, U64_C(0x5d550bb5478308fe) /*   63 */,
   U64_C(0xa9efa98da0eda22a) /*   64 */, U64_C(0xc351a71686c40da7) /*   65 */,
   U64_C(0x1105586d9c867c84) /*   66 */, U64_C(0xdcffee85fda22853) /*   67 */,
   U64_C(0xccfbd0262c5eef76) /*   68 */, U64_C(0xbaf294cb8990d201) /*   69 */,
   U64_C(0xe69464f52afad975) /*   70 */, U64_C(0x94b013afdf133e14) /*   71 */,
   U64_C(0x06a7d1a32823c958) /*   72 */, U64_C(0x6f95fe5130f61119) /*   73 */,
   U64_C(0xd92ab34e462c06c0) /*   74 */, U64_C(0xed7bde33887c71d2) /*   75 */,
   U64_C(0x79746d6e6518393e) /*   76 */, U64_C(0x5ba419385d713329) /*   77 */,
   U64_C(0x7c1ba6b948a97564) /*   78 */, U64_C(0x31987c197bfdac67) /*   79 */,
   U64_C(0xde6c23c44b053d02) /*   80 */, U64_C(0x581c49fed002d64d) /*   81 */,
   U64_C(0xdd474d6338261571) /*   82 */, U64_C(0xaa4546c3e473d062) /*   83 */,
   U64_C(0x928fce349455f860) /*   84 */, U64_C(0x48161bbacaab94d9) /*   85 */,
   U64_C(0x63912430770e6f68) /*   86 */, U64_C(0x6ec8a5e602c6641c) /*   87 */,
   U64_C(0x87282515337ddd2b) /*   88 */, U64_C(0x2cda6b42034b701b) /*   89 */,
   U64_C(0xb03d37c181cb096d) /*   90 */, U64_C(0xe108438266c71c6f) /*   91 */,
   U64_C(0x2b3180c7eb51b255) /*   92 */, U64_C(0xdf92b82f96c08bbc) /*   93 */,
   U64_C(0x5c68c8c0a632f3ba) /*   94 */, U64_C(0x5504cc861c3d0556) /*   95 */,
   U64_C(0xabbfa4e55fb26b8f) /*   96 */, U64_C(0x41848b0ab3baceb4) /*   97 */,
   U64_C(0xb334a273aa445d32) /*   98 */, U64_C(0xbca696f0a85ad881) /*   99 */,
   U64_C(0x24f6ec65b528d56c) /*  100 */, U64_C(0x0ce1512e90f4524a) /*  101 */,
   U64_C(0x4e9dd79d5506d35a) /*  102 */, U64_C(0x258905fac6ce9779) /*  103 */,
   U64_C(0x2019295b3e109b33) /*  104 */, U64_C(0xf8a9478b73a054cc) /*  105 */,
   U64_C(0x2924f2f934417eb0) /*  106 */, U64_C(0x3993357d536d1bc4) /*  107 */,
   U64_C(0x38a81ac21db6ff8b) /*  108 */, U64_C(0x47c4fbf17d6016bf) /*  109 */,
   U64_C(0x1e0faadd7667e3f5) /*  110 */, U64_C(0x7abcff62938beb96) /*  111 */,
   U64_C(0xa78dad948fc179c9) /*  112 */, U64_C(0x8f1f98b72911e50d) /*  113 */,
   U64_C(0x61e48eae27121a91) /*  114 */, U64_C(0x4d62f7ad31859808) /*  115 */,
   U64_C(0xeceba345ef5ceaeb) /*  116 */, U64_C(0xf5ceb25ebc9684ce) /*  117 */,
   U64_C(0xf633e20cb7f76221) /*  118 */, U64_C(0xa32cdf06ab8293e4) /*  119 */,
   U64_C(0x985a202ca5ee2ca4) /*  120 */, U64_C(0xcf0b8447cc8a8fb1) /*  121 */,
   U64_C(0x9f765244979859a3) /*  122 */, U64_C(0xa8d516b1a1240017) /*  123 */,
   U64_C(0x0bd7ba3ebb5dc726) /*  124 */, U64_C(0xe54bca55b86adb39) /*  125 */,
   U64_C(0x1d7a3afd6c478063) /*  126 */, U64_C(0x519ec608e7669edd) /*  127 */,
   U64_C(0x0e5715a2d149aa23) /*  128 */, U64_C(0x177d4571848ff194) /*  129 */,
   U64_C(0xeeb55f3241014c22) /*  130 */, U64_C(0x0f5e5ca13a6e2ec2) /*  131 */,
   U64_C(0x8029927b75f5c361) /*  132 */, U64_C(0xad139fabc3d6e436) /*  133 */,
   U64_C(0x0d5df1a94ccf402f) /*  134 */, U64_C(0x3e8bd948bea5dfc8) /*  135 */,
   U64_C(0xa5a0d357bd3ff77e) /*  136 */, U64_C(0xa2d12e251f74f645) /*  137 */,
   U64_C(0x66fd9e525e81a082) /*  138 */, U64_C(0x2e0c90ce7f687a49) /*  139 */,
   U64_C(0xc2e8bcbeba973bc5) /*  140 */, U64_C(0x000001bce509745f) /*  141 */,
   U64_C(0x423777bbe6dab3d6) /*  142 */, U64_C(0xd1661c7eaef06eb5) /*  143 */,
   U64_C(0xa1781f354daacfd8) /*  144 */, U64_C(0x2d11284a2b16affc) /*  145 */,
   U64_C(0xf1fc4f67fa891d1f) /*  146 */, U64_C(0x73ecc25dcb920ada) /*  147 */,
   U64_C(0xae610c22c2a12651) /*  148 */, U64_C(0x96e0a810d356b78a) /*  149 */,
   U64_C(0x5a9a381f2fe7870f) /*  150 */, U64_C(0xd5ad62ede94e5530) /*  151 */,
   U64_C(0xd225e5e8368d1427) /*  152 */, U64_C(0x65977b70c7af4631) /*  153 */,
   U64_C(0x99f889b2de39d74f) /*  154 */, U64_C(0x233f30bf54e1d143) /*  155 */,
   U64_C(0x9a9675d3d9a63c97) /*  156 */, U64_C(0x5470554ff334f9a8) /*  157 */,
   U64_C(0x166acb744a4f5688) /*  158 */, U64_C(0x70c74caab2e4aead) /*  159 */,
   U64_C(0xf0d091646f294d12) /*  160 */, U64_C(0x57b82a89684031d1) /*  161 */,
   U64_C(0xefd95a5a61be0b6b) /*  162 */, U64_C(0x2fbd12e969f2f29a) /*  163 */,
   U64_C(0x9bd37013feff9fe8) /*  164 */, U64_C(0x3f9b0404d6085a06) /*  165 */,
   U64_C(0x4940c1f3166cfe15) /*  166 */, U64_C(0x09542c4dcdf3defb) /*  167 */,
   U64_C(0xb4c5218385cd5ce3) /*  168 */, U64_C(0xc935b7dc4462a641) /*  169 */,
   U64_C(0x3417f8a68ed3b63f) /*  170 */, U64_C(0xb80959295b215b40) /*  171 */,
   U64_C(0xf99cdaef3b8c8572) /*  172 */, U64_C(0x018c0614f8fcb95d) /*  173 */,
   U64_C(0x1b14accd1a3acdf3) /*  174 */, U64_C(0x84d471f200bb732d) /*  175 */,
   U64_C(0xc1a3110e95e8da16) /*  176 */, U64_C(0x430a7220bf1a82b8) /*  177 */,
   U64_C(0xb77e090d39df210e) /*  178 */, U64_C(0x5ef4bd9f3cd05e9d) /*  179 */,
   U64_C(0x9d4ff6da7e57a444) /*  180 */, U64_C(0xda1d60e183d4a5f8) /*  181 */,
   U64_C(0xb287c38417998e47) /*  182 */, U64_C(0xfe3edc121bb31886) /*  183 */,
   U64_C(0xc7fe3ccc980ccbef) /*  184 */, U64_C(0xe46fb590189bfd03) /*  185 */,
   U64_C(0x3732fd469a4c57dc) /*  186 */, U64_C(0x7ef700a07cf1ad65) /*  187 */,
   U64_C(0x59c64468a31d8859) /*  188 */, U64_C(0x762fb0b4d45b61f6) /*  189 */,
   U64_C(0x155baed099047718) /*  190 */, U64_C(0x68755e4c3d50baa6) /*  191 */,
   U64_C(0xe9214e7f22d8b4df) /*  192 */, U64_C(0x2addbf532eac95f4) /*  193 */,
   U64_C(0x32ae3909b4bd0109) /*  194 */, U64_C(0x834df537b08e3450) /*  195 */,
   U64_C(0xfa209da84220728d) /*  196 */, U64_C(0x9e691d9b9efe23f7) /*  197 */,
   U64_C(0x0446d288c4ae8d7f) /*  198 */, U64_C(0x7b4cc524e169785b) /*  199 */,
   U64_C(0x21d87f0135ca1385) /*  200 */, U64_C(0xcebb400f137b8aa5) /*  201 */,
   U64_C(0x272e2b66580796be) /*  202 */, U64_C(0x3612264125c2b0de) /*  203 */,
   U64_C(0x057702bdad1efbb2) /*  204 */, U64_C(0xd4babb8eacf84be9) /*  205 */,
   U64_C(0x91583139641bc67b) /*  206 */, U64_C(0x8bdc2de08036e024) /*  207 */,
   U64_C(0x603c8156f49f68ed) /*  208 */, U64_C(0xf7d236f7dbef5111) /*  209 */,
   U64_C(0x9727c4598ad21e80) /*  210 */, U64_C(0xa08a0896670a5fd7) /*  211 */,
   U64_C(0xcb4a8f4309eba9cb) /*  212 */, U64_C(0x81af564b0f7036a1) /*  213 */,
   U64_C(0xc0b99aa778199abd) /*  214 */, U64_C(0x959f1ec83fc8e952) /*  215 */,
   U64_C(0x8c505077794a81b9) /*  216 */, U64_C(0x3acaaf8f056338f0) /*  217 */,
   U64_C(0x07b43f50627a6778) /*  218 */, U64_C(0x4a44ab49f5eccc77) /*  219 */,
   U64_C(0x3bc3d6e4b679ee98) /*  220 */, U64_C(0x9cc0d4d1cf14108c) /*  221 */,
   U64_C(0x4406c00b206bc8a0) /*  222 */, U64_C(0x82a18854c8d72d89) /*  223 */,
   U64_C(0x67e366b35c3c432c) /*  224 */, U64_C(0xb923dd61102b37f2) /*  225 */,
   U64_C(0x56ab2779d884271d) /*  226 */, U64_C(0xbe83e1b0ff1525af) /*  227 */,
   U64_C(0xfb7c65d4217e49a9) /*  228 */, U64_C(0x6bdbe0e76d48e7d4) /*  229 */,
   U64_C(0x08df828745d9179e) /*  230 */, U64_C(0x22ea6a9add53bd34) /*  231 */,
   U64_C(0xe36e141c5622200a) /*  232 */, U64_C(0x7f805d1b8cb750ee) /*  233 */,
   U64_C(0xafe5c7a59f58e837) /*  234 */, U64_C(0xe27f996a4fb1c23c) /*  235 */,
   U64_C(0xd3867dfb0775f0d0) /*  236 */, U64_C(0xd0e673de6e88891a) /*  237 */,
   U64_C(0x123aeb9eafb86c25) /*  238 */, U64_C(0x30f1d5d5c145b895) /*  239 */,
   U64_C(0xbb434a2dee7269e7) /*  240 */, U64_C(0x78cb67ecf931fa38) /*  241 */,
   U64_C(0xf33b0372323bbf9c) /*  242 */, U64_C(0x52d66336fb279c74) /*  243 */,
   U64_C(0x505f33ac0afb4eaa) /*  244 */, U64_C(0xe8a5cd99a2cce187) /*  245 */,
   U64_C(0x534974801e2d30bb) /*  246 */, U64_C(0x8d2d5711d5876d90) /*  247 */,
   U64_C(0x1f1a412891bc038e) /*  248 */, U64_C(0xd6e2e71d82e56648) /*  249 */,
   U64_C(0x74036c3a497732b7) /*  250 */, U64_C(0x89b67ed96361f5ab) /*  251 */,
   U64_C(0xffed95d8f1ea02a2) /*  252 */, U64_C(0xe72b3bd61464d43d) /*  253 */,
   U64_C(0xa6300f170bdc4820) /*  254 */, U64_C(0xebc18760ed78a77a) /*  255 */
 };
 static u64 sbox2[256] = {
   U64_C(0xe6a6be5a05a12138) /*  256 */, U64_C(0xb5a122a5b4f87c98) /*  257 */,
   U64_C(0x563c6089140b6990) /*  258 */, U64_C(0x4c46cb2e391f5dd5) /*  259 */,
   U64_C(0xd932addbc9b79434) /*  260 */, U64_C(0x08ea70e42015aff5) /*  261 */,
   U64_C(0xd765a6673e478cf1) /*  262 */, U64_C(0xc4fb757eab278d99) /*  263 */,
   U64_C(0xdf11c6862d6e0692) /*  264 */, U64_C(0xddeb84f10d7f3b16) /*  265 */,
   U64_C(0x6f2ef604a665ea04) /*  266 */, U64_C(0x4a8e0f0ff0e0dfb3) /*  267 */,
   U64_C(0xa5edeef83dbcba51) /*  268 */, U64_C(0xfc4f0a2a0ea4371e) /*  269 */,
   U64_C(0xe83e1da85cb38429) /*  270 */, U64_C(0xdc8ff882ba1b1ce2) /*  271 */,
   U64_C(0xcd45505e8353e80d) /*  272 */, U64_C(0x18d19a00d4db0717) /*  273 */,
   U64_C(0x34a0cfeda5f38101) /*  274 */, U64_C(0x0be77e518887caf2) /*  275 */,
   U64_C(0x1e341438b3c45136) /*  276 */, U64_C(0xe05797f49089ccf9) /*  277 */,
   U64_C(0xffd23f9df2591d14) /*  278 */, U64_C(0x543dda228595c5cd) /*  279 */,
   U64_C(0x661f81fd99052a33) /*  280 */, U64_C(0x8736e641db0f7b76) /*  281 */,
   U64_C(0x15227725418e5307) /*  282 */, U64_C(0xe25f7f46162eb2fa) /*  283 */,
   U64_C(0x48a8b2126c13d9fe) /*  284 */, U64_C(0xafdc541792e76eea) /*  285 */,
   U64_C(0x03d912bfc6d1898f) /*  286 */, U64_C(0x31b1aafa1b83f51b) /*  287 */,
   U64_C(0xf1ac2796e42ab7d9) /*  288 */, U64_C(0x40a3a7d7fcd2ebac) /*  289 */,
   U64_C(0x1056136d0afbbcc5) /*  290 */, U64_C(0x7889e1dd9a6d0c85) /*  291 */,
   U64_C(0xd33525782a7974aa) /*  292 */, U64_C(0xa7e25d09078ac09b) /*  293 */,
   U64_C(0xbd4138b3eac6edd0) /*  294 */, U64_C(0x920abfbe71eb9e70) /*  295 */,
   U64_C(0xa2a5d0f54fc2625c) /*  296 */, U64_C(0xc054e36b0b1290a3) /*  297 */,
   U64_C(0xf6dd59ff62fe932b) /*  298 */, U64_C(0x3537354511a8ac7d) /*  299 */,
   U64_C(0xca845e9172fadcd4) /*  300 */, U64_C(0x84f82b60329d20dc) /*  301 */,
   U64_C(0x79c62ce1cd672f18) /*  302 */, U64_C(0x8b09a2add124642c) /*  303 */,
   U64_C(0xd0c1e96a19d9e726) /*  304 */, U64_C(0x5a786a9b4ba9500c) /*  305 */,
   U64_C(0x0e020336634c43f3) /*  306 */, U64_C(0xc17b474aeb66d822) /*  307 */,
   U64_C(0x6a731ae3ec9baac2) /*  308 */, U64_C(0x8226667ae0840258) /*  309 */,
   U64_C(0x67d4567691caeca5) /*  310 */, U64_C(0x1d94155c4875adb5) /*  311 */,
   U64_C(0x6d00fd985b813fdf) /*  312 */, U64_C(0x51286efcb774cd06) /*  313 */,
   U64_C(0x5e8834471fa744af) /*  314 */, U64_C(0xf72ca0aee761ae2e) /*  315 */,
   U64_C(0xbe40e4cdaee8e09a) /*  316 */, U64_C(0xe9970bbb5118f665) /*  317 */,
   U64_C(0x726e4beb33df1964) /*  318 */, U64_C(0x703b000729199762) /*  319 */,
   U64_C(0x4631d816f5ef30a7) /*  320 */, U64_C(0xb880b5b51504a6be) /*  321 */,
   U64_C(0x641793c37ed84b6c) /*  322 */, U64_C(0x7b21ed77f6e97d96) /*  323 */,
   U64_C(0x776306312ef96b73) /*  324 */, U64_C(0xae528948e86ff3f4) /*  325 */,
   U64_C(0x53dbd7f286a3f8f8) /*  326 */, U64_C(0x16cadce74cfc1063) /*  327 */,
   U64_C(0x005c19bdfa52c6dd) /*  328 */, U64_C(0x68868f5d64d46ad3) /*  329 */,
   U64_C(0x3a9d512ccf1e186a) /*  330 */, U64_C(0x367e62c2385660ae) /*  331 */,
   U64_C(0xe359e7ea77dcb1d7) /*  332 */, U64_C(0x526c0773749abe6e) /*  333 */,
   U64_C(0x735ae5f9d09f734b) /*  334 */, U64_C(0x493fc7cc8a558ba8) /*  335 */,
   U64_C(0xb0b9c1533041ab45) /*  336 */, U64_C(0x321958ba470a59bd) /*  337 */,
   U64_C(0x852db00b5f46c393) /*  338 */, U64_C(0x91209b2bd336b0e5) /*  339 */,
   U64_C(0x6e604f7d659ef19f) /*  340 */, U64_C(0xb99a8ae2782ccb24) /*  341 */,
   U64_C(0xccf52ab6c814c4c7) /*  342 */, U64_C(0x4727d9afbe11727b) /*  343 */,
   U64_C(0x7e950d0c0121b34d) /*  344 */, U64_C(0x756f435670ad471f) /*  345 */,
   U64_C(0xf5add442615a6849) /*  346 */, U64_C(0x4e87e09980b9957a) /*  347 */,
   U64_C(0x2acfa1df50aee355) /*  348 */, U64_C(0xd898263afd2fd556) /*  349 */,
   U64_C(0xc8f4924dd80c8fd6) /*  350 */, U64_C(0xcf99ca3d754a173a) /*  351 */,
   U64_C(0xfe477bacaf91bf3c) /*  352 */, U64_C(0xed5371f6d690c12d) /*  353 */,
   U64_C(0x831a5c285e687094) /*  354 */, U64_C(0xc5d3c90a3708a0a4) /*  355 */,
   U64_C(0x0f7f903717d06580) /*  356 */, U64_C(0x19f9bb13b8fdf27f) /*  357 */,
   U64_C(0xb1bd6f1b4d502843) /*  358 */, U64_C(0x1c761ba38fff4012) /*  359 */,
   U64_C(0x0d1530c4e2e21f3b) /*  360 */, U64_C(0x8943ce69a7372c8a) /*  361 */,
   U64_C(0xe5184e11feb5ce66) /*  362 */, U64_C(0x618bdb80bd736621) /*  363 */,
   U64_C(0x7d29bad68b574d0b) /*  364 */, U64_C(0x81bb613e25e6fe5b) /*  365 */,
   U64_C(0x071c9c10bc07913f) /*  366 */, U64_C(0xc7beeb7909ac2d97) /*  367 */,
   U64_C(0xc3e58d353bc5d757) /*  368 */, U64_C(0xeb017892f38f61e8) /*  369 */,
   U64_C(0xd4effb9c9b1cc21a) /*  370 */, U64_C(0x99727d26f494f7ab) /*  371 */,
   U64_C(0xa3e063a2956b3e03) /*  372 */, U64_C(0x9d4a8b9a4aa09c30) /*  373 */,
   U64_C(0x3f6ab7d500090fb4) /*  374 */, U64_C(0x9cc0f2a057268ac0) /*  375 */,
   U64_C(0x3dee9d2dedbf42d1) /*  376 */, U64_C(0x330f49c87960a972) /*  377 */,
   U64_C(0xc6b2720287421b41) /*  378 */, U64_C(0x0ac59ec07c00369c) /*  379 */,
   U64_C(0xef4eac49cb353425) /*  380 */, U64_C(0xf450244eef0129d8) /*  381 */,
   U64_C(0x8acc46e5caf4deb6) /*  382 */, U64_C(0x2ffeab63989263f7) /*  383 */,
   U64_C(0x8f7cb9fe5d7a4578) /*  384 */, U64_C(0x5bd8f7644e634635) /*  385 */,
   U64_C(0x427a7315bf2dc900) /*  386 */, U64_C(0x17d0c4aa2125261c) /*  387 */,
   U64_C(0x3992486c93518e50) /*  388 */, U64_C(0xb4cbfee0a2d7d4c3) /*  389 */,
   U64_C(0x7c75d6202c5ddd8d) /*  390 */, U64_C(0xdbc295d8e35b6c61) /*  391 */,
   U64_C(0x60b369d302032b19) /*  392 */, U64_C(0xce42685fdce44132) /*  393 */,
   U64_C(0x06f3ddb9ddf65610) /*  394 */, U64_C(0x8ea4d21db5e148f0) /*  395 */,
   U64_C(0x20b0fce62fcd496f) /*  396 */, U64_C(0x2c1b912358b0ee31) /*  397 */,
   U64_C(0xb28317b818f5a308) /*  398 */, U64_C(0xa89c1e189ca6d2cf) /*  399 */,
   U64_C(0x0c6b18576aaadbc8) /*  400 */, U64_C(0xb65deaa91299fae3) /*  401 */,
   U64_C(0xfb2b794b7f1027e7) /*  402 */, U64_C(0x04e4317f443b5beb) /*  403 */,
   U64_C(0x4b852d325939d0a6) /*  404 */, U64_C(0xd5ae6beefb207ffc) /*  405 */,
   U64_C(0x309682b281c7d374) /*  406 */, U64_C(0xbae309a194c3b475) /*  407 */,
   U64_C(0x8cc3f97b13b49f05) /*  408 */, U64_C(0x98a9422ff8293967) /*  409 */,
   U64_C(0x244b16b01076ff7c) /*  410 */, U64_C(0xf8bf571c663d67ee) /*  411 */,
   U64_C(0x1f0d6758eee30da1) /*  412 */, U64_C(0xc9b611d97adeb9b7) /*  413 */,
   U64_C(0xb7afd5887b6c57a2) /*  414 */, U64_C(0x6290ae846b984fe1) /*  415 */,
   U64_C(0x94df4cdeacc1a5fd) /*  416 */, U64_C(0x058a5bd1c5483aff) /*  417 */,
   U64_C(0x63166cc142ba3c37) /*  418 */, U64_C(0x8db8526eb2f76f40) /*  419 */,
   U64_C(0xe10880036f0d6d4e) /*  420 */, U64_C(0x9e0523c9971d311d) /*  421 */,
   U64_C(0x45ec2824cc7cd691) /*  422 */, U64_C(0x575b8359e62382c9) /*  423 */,
   U64_C(0xfa9e400dc4889995) /*  424 */, U64_C(0xd1823ecb45721568) /*  425 */,
   U64_C(0xdafd983b8206082f) /*  426 */, U64_C(0xaa7d29082386a8cb) /*  427 */,
   U64_C(0x269fcd4403b87588) /*  428 */, U64_C(0x1b91f5f728bdd1e0) /*  429 */,
   U64_C(0xe4669f39040201f6) /*  430 */, U64_C(0x7a1d7c218cf04ade) /*  431 */,
   U64_C(0x65623c29d79ce5ce) /*  432 */, U64_C(0x2368449096c00bb1) /*  433 */,
   U64_C(0xab9bf1879da503ba) /*  434 */, U64_C(0xbc23ecb1a458058e) /*  435 */,
   U64_C(0x9a58df01bb401ecc) /*  436 */, U64_C(0xa070e868a85f143d) /*  437 */,
   U64_C(0x4ff188307df2239e) /*  438 */, U64_C(0x14d565b41a641183) /*  439 */,
   U64_C(0xee13337452701602) /*  440 */, U64_C(0x950e3dcf3f285e09) /*  441 */,
   U64_C(0x59930254b9c80953) /*  442 */, U64_C(0x3bf299408930da6d) /*  443 */,
   U64_C(0xa955943f53691387) /*  444 */, U64_C(0xa15edecaa9cb8784) /*  445 */,
   U64_C(0x29142127352be9a0) /*  446 */, U64_C(0x76f0371fff4e7afb) /*  447 */,
   U64_C(0x0239f450274f2228) /*  448 */, U64_C(0xbb073af01d5e868b) /*  449 */,
   U64_C(0xbfc80571c10e96c1) /*  450 */, U64_C(0xd267088568222e23) /*  451 */,
   U64_C(0x9671a3d48e80b5b0) /*  452 */, U64_C(0x55b5d38ae193bb81) /*  453 */,
   U64_C(0x693ae2d0a18b04b8) /*  454 */, U64_C(0x5c48b4ecadd5335f) /*  455 */,
   U64_C(0xfd743b194916a1ca) /*  456 */, U64_C(0x2577018134be98c4) /*  457 */,
   U64_C(0xe77987e83c54a4ad) /*  458 */, U64_C(0x28e11014da33e1b9) /*  459 */,
   U64_C(0x270cc59e226aa213) /*  460 */, U64_C(0x71495f756d1a5f60) /*  461 */,
   U64_C(0x9be853fb60afef77) /*  462 */, U64_C(0xadc786a7f7443dbf) /*  463 */,
   U64_C(0x0904456173b29a82) /*  464 */, U64_C(0x58bc7a66c232bd5e) /*  465 */,
   U64_C(0xf306558c673ac8b2) /*  466 */, U64_C(0x41f639c6b6c9772a) /*  467 */,
   U64_C(0x216defe99fda35da) /*  468 */, U64_C(0x11640cc71c7be615) /*  469 */,
   U64_C(0x93c43694565c5527) /*  470 */, U64_C(0xea038e6246777839) /*  471 */,
   U64_C(0xf9abf3ce5a3e2469) /*  472 */, U64_C(0x741e768d0fd312d2) /*  473 */,
   U64_C(0x0144b883ced652c6) /*  474 */, U64_C(0xc20b5a5ba33f8552) /*  475 */,
   U64_C(0x1ae69633c3435a9d) /*  476 */, U64_C(0x97a28ca4088cfdec) /*  477 */,
   U64_C(0x8824a43c1e96f420) /*  478 */, U64_C(0x37612fa66eeea746) /*  479 */,
   U64_C(0x6b4cb165f9cf0e5a) /*  480 */, U64_C(0x43aa1c06a0abfb4a) /*  481 */,
   U64_C(0x7f4dc26ff162796b) /*  482 */, U64_C(0x6cbacc8e54ed9b0f) /*  483 */,
   U64_C(0xa6b7ffefd2bb253e) /*  484 */, U64_C(0x2e25bc95b0a29d4f) /*  485 */,
   U64_C(0x86d6a58bdef1388c) /*  486 */, U64_C(0xded74ac576b6f054) /*  487 */,
   U64_C(0x8030bdbc2b45805d) /*  488 */, U64_C(0x3c81af70e94d9289) /*  489 */,
   U64_C(0x3eff6dda9e3100db) /*  490 */, U64_C(0xb38dc39fdfcc8847) /*  491 */,
   U64_C(0x123885528d17b87e) /*  492 */, U64_C(0xf2da0ed240b1b642) /*  493 */,
   U64_C(0x44cefadcd54bf9a9) /*  494 */, U64_C(0x1312200e433c7ee6) /*  495 */,
   U64_C(0x9ffcc84f3a78c748) /*  496 */, U64_C(0xf0cd1f72248576bb) /*  497 */,
   U64_C(0xec6974053638cfe4) /*  498 */, U64_C(0x2ba7b67c0cec4e4c) /*  499 */,
   U64_C(0xac2f4df3e5ce32ed) /*  500 */, U64_C(0xcb33d14326ea4c11) /*  501 */,
   U64_C(0xa4e9044cc77e58bc) /*  502 */, U64_C(0x5f513293d934fcef) /*  503 */,
   U64_C(0x5dc9645506e55444) /*  504 */, U64_C(0x50de418f317de40a) /*  505 */,
   U64_C(0x388cb31a69dde259) /*  506 */, U64_C(0x2db4a83455820a86) /*  507 */,
   U64_C(0x9010a91e84711ae9) /*  508 */, U64_C(0x4df7f0b7b1498371) /*  509 */,
   U64_C(0xd62a2eabc0977179) /*  510 */, U64_C(0x22fac097aa8d5c0e) /*  511 */
 };
 static u64 sbox3[256] = {
   U64_C(0xf49fcc2ff1daf39b) /*  512 */, U64_C(0x487fd5c66ff29281) /*  513 */,
   U64_C(0xe8a30667fcdca83f) /*  514 */, U64_C(0x2c9b4be3d2fcce63) /*  515 */,
   U64_C(0xda3ff74b93fbbbc2) /*  516 */, U64_C(0x2fa165d2fe70ba66) /*  517 */,
   U64_C(0xa103e279970e93d4) /*  518 */, U64_C(0xbecdec77b0e45e71) /*  519 */,
   U64_C(0xcfb41e723985e497) /*  520 */, U64_C(0xb70aaa025ef75017) /*  521 */,
   U64_C(0xd42309f03840b8e0) /*  522 */, U64_C(0x8efc1ad035898579) /*  523 */,
   U64_C(0x96c6920be2b2abc5) /*  524 */, U64_C(0x66af4163375a9172) /*  525 */,
   U64_C(0x2174abdcca7127fb) /*  526 */, U64_C(0xb33ccea64a72ff41) /*  527 */,
   U64_C(0xf04a4933083066a5) /*  528 */, U64_C(0x8d970acdd7289af5) /*  529 */,
   U64_C(0x8f96e8e031c8c25e) /*  530 */, U64_C(0xf3fec02276875d47) /*  531 */,
   U64_C(0xec7bf310056190dd) /*  532 */, U64_C(0xf5adb0aebb0f1491) /*  533 */,
   U64_C(0x9b50f8850fd58892) /*  534 */, U64_C(0x4975488358b74de8) /*  535 */,
   U64_C(0xa3354ff691531c61) /*  536 */, U64_C(0x0702bbe481d2c6ee) /*  537 */,
   U64_C(0x89fb24057deded98) /*  538 */, U64_C(0xac3075138596e902) /*  539 */,
   U64_C(0x1d2d3580172772ed) /*  540 */, U64_C(0xeb738fc28e6bc30d) /*  541 */,
   U64_C(0x5854ef8f63044326) /*  542 */, U64_C(0x9e5c52325add3bbe) /*  543 */,
   U64_C(0x90aa53cf325c4623) /*  544 */, U64_C(0xc1d24d51349dd067) /*  545 */,
   U64_C(0x2051cfeea69ea624) /*  546 */, U64_C(0x13220f0a862e7e4f) /*  547 */,
   U64_C(0xce39399404e04864) /*  548 */, U64_C(0xd9c42ca47086fcb7) /*  549 */,
   U64_C(0x685ad2238a03e7cc) /*  550 */, U64_C(0x066484b2ab2ff1db) /*  551 */,
   U64_C(0xfe9d5d70efbf79ec) /*  552 */, U64_C(0x5b13b9dd9c481854) /*  553 */,
   U64_C(0x15f0d475ed1509ad) /*  554 */, U64_C(0x0bebcd060ec79851) /*  555 */,
   U64_C(0xd58c6791183ab7f8) /*  556 */, U64_C(0xd1187c5052f3eee4) /*  557 */,
   U64_C(0xc95d1192e54e82ff) /*  558 */, U64_C(0x86eea14cb9ac6ca2) /*  559 */,
   U64_C(0x3485beb153677d5d) /*  560 */, U64_C(0xdd191d781f8c492a) /*  561 */,
   U64_C(0xf60866baa784ebf9) /*  562 */, U64_C(0x518f643ba2d08c74) /*  563 */,
   U64_C(0x8852e956e1087c22) /*  564 */, U64_C(0xa768cb8dc410ae8d) /*  565 */,
   U64_C(0x38047726bfec8e1a) /*  566 */, U64_C(0xa67738b4cd3b45aa) /*  567 */,
   U64_C(0xad16691cec0dde19) /*  568 */, U64_C(0xc6d4319380462e07) /*  569 */,
   U64_C(0xc5a5876d0ba61938) /*  570 */, U64_C(0x16b9fa1fa58fd840) /*  571 */,
   U64_C(0x188ab1173ca74f18) /*  572 */, U64_C(0xabda2f98c99c021f) /*  573 */,
   U64_C(0x3e0580ab134ae816) /*  574 */, U64_C(0x5f3b05b773645abb) /*  575 */,
   U64_C(0x2501a2be5575f2f6) /*  576 */, U64_C(0x1b2f74004e7e8ba9) /*  577 */,
   U64_C(0x1cd7580371e8d953) /*  578 */, U64_C(0x7f6ed89562764e30) /*  579 */,
   U64_C(0xb15926ff596f003d) /*  580 */, U64_C(0x9f65293da8c5d6b9) /*  581 */,
   U64_C(0x6ecef04dd690f84c) /*  582 */, U64_C(0x4782275fff33af88) /*  583 */,
   U64_C(0xe41433083f820801) /*  584 */, U64_C(0xfd0dfe409a1af9b5) /*  585 */,
   U64_C(0x4325a3342cdb396b) /*  586 */, U64_C(0x8ae77e62b301b252) /*  587 */,
   U64_C(0xc36f9e9f6655615a) /*  588 */, U64_C(0x85455a2d92d32c09) /*  589 */,
   U64_C(0xf2c7dea949477485) /*  590 */, U64_C(0x63cfb4c133a39eba) /*  591 */,
   U64_C(0x83b040cc6ebc5462) /*  592 */, U64_C(0x3b9454c8fdb326b0) /*  593 */,
   U64_C(0x56f56a9e87ffd78c) /*  594 */, U64_C(0x2dc2940d99f42bc6) /*  595 */,
   U64_C(0x98f7df096b096e2d) /*  596 */, U64_C(0x19a6e01e3ad852bf) /*  597 */,
   U64_C(0x42a99ccbdbd4b40b) /*  598 */, U64_C(0xa59998af45e9c559) /*  599 */,
   U64_C(0x366295e807d93186) /*  600 */, U64_C(0x6b48181bfaa1f773) /*  601 */,
   U64_C(0x1fec57e2157a0a1d) /*  602 */, U64_C(0x4667446af6201ad5) /*  603 */,
   U64_C(0xe615ebcacfb0f075) /*  604 */, U64_C(0xb8f31f4f68290778) /*  605 */,
   U64_C(0x22713ed6ce22d11e) /*  606 */, U64_C(0x3057c1a72ec3c93b) /*  607 */,
   U64_C(0xcb46acc37c3f1f2f) /*  608 */, U64_C(0xdbb893fd02aaf50e) /*  609 */,
   U64_C(0x331fd92e600b9fcf) /*  610 */, U64_C(0xa498f96148ea3ad6) /*  611 */,
   U64_C(0xa8d8426e8b6a83ea) /*  612 */, U64_C(0xa089b274b7735cdc) /*  613 */,
   U64_C(0x87f6b3731e524a11) /*  614 */, U64_C(0x118808e5cbc96749) /*  615 */,
   U64_C(0x9906e4c7b19bd394) /*  616 */, U64_C(0xafed7f7e9b24a20c) /*  617 */,
   U64_C(0x6509eadeeb3644a7) /*  618 */, U64_C(0x6c1ef1d3e8ef0ede) /*  619 */,
   U64_C(0xb9c97d43e9798fb4) /*  620 */, U64_C(0xa2f2d784740c28a3) /*  621 */,
   U64_C(0x7b8496476197566f) /*  622 */, U64_C(0x7a5be3e6b65f069d) /*  623 */,
   U64_C(0xf96330ed78be6f10) /*  624 */, U64_C(0xeee60de77a076a15) /*  625 */,
   U64_C(0x2b4bee4aa08b9bd0) /*  626 */, U64_C(0x6a56a63ec7b8894e) /*  627 */,
   U64_C(0x02121359ba34fef4) /*  628 */, U64_C(0x4cbf99f8283703fc) /*  629 */,
   U64_C(0x398071350caf30c8) /*  630 */, U64_C(0xd0a77a89f017687a) /*  631 */,
   U64_C(0xf1c1a9eb9e423569) /*  632 */, U64_C(0x8c7976282dee8199) /*  633 */,
   U64_C(0x5d1737a5dd1f7abd) /*  634 */, U64_C(0x4f53433c09a9fa80) /*  635 */,
   U64_C(0xfa8b0c53df7ca1d9) /*  636 */, U64_C(0x3fd9dcbc886ccb77) /*  637 */,
   U64_C(0xc040917ca91b4720) /*  638 */, U64_C(0x7dd00142f9d1dcdf) /*  639 */,
   U64_C(0x8476fc1d4f387b58) /*  640 */, U64_C(0x23f8e7c5f3316503) /*  641 */,
   U64_C(0x032a2244e7e37339) /*  642 */, U64_C(0x5c87a5d750f5a74b) /*  643 */,
   U64_C(0x082b4cc43698992e) /*  644 */, U64_C(0xdf917becb858f63c) /*  645 */,
   U64_C(0x3270b8fc5bf86dda) /*  646 */, U64_C(0x10ae72bb29b5dd76) /*  647 */,
   U64_C(0x576ac94e7700362b) /*  648 */, U64_C(0x1ad112dac61efb8f) /*  649 */,
   U64_C(0x691bc30ec5faa427) /*  650 */, U64_C(0xff246311cc327143) /*  651 */,
   U64_C(0x3142368e30e53206) /*  652 */, U64_C(0x71380e31e02ca396) /*  653 */,
   U64_C(0x958d5c960aad76f1) /*  654 */, U64_C(0xf8d6f430c16da536) /*  655 */,
   U64_C(0xc8ffd13f1be7e1d2) /*  656 */, U64_C(0x7578ae66004ddbe1) /*  657 */,
   U64_C(0x05833f01067be646) /*  658 */, U64_C(0xbb34b5ad3bfe586d) /*  659 */,
   U64_C(0x095f34c9a12b97f0) /*  660 */, U64_C(0x247ab64525d60ca8) /*  661 */,
   U64_C(0xdcdbc6f3017477d1) /*  662 */, U64_C(0x4a2e14d4decad24d) /*  663 */,
   U64_C(0xbdb5e6d9be0a1eeb) /*  664 */, U64_C(0x2a7e70f7794301ab) /*  665 */,
   U64_C(0xdef42d8a270540fd) /*  666 */, U64_C(0x01078ec0a34c22c1) /*  667 */,
   U64_C(0xe5de511af4c16387) /*  668 */, U64_C(0x7ebb3a52bd9a330a) /*  669 */,
   U64_C(0x77697857aa7d6435) /*  670 */, U64_C(0x004e831603ae4c32) /*  671 */,
   U64_C(0xe7a21020ad78e312) /*  672 */, U64_C(0x9d41a70c6ab420f2) /*  673 */,
   U64_C(0x28e06c18ea1141e6) /*  674 */, U64_C(0xd2b28cbd984f6b28) /*  675 */,
   U64_C(0x26b75f6c446e9d83) /*  676 */, U64_C(0xba47568c4d418d7f) /*  677 */,
   U64_C(0xd80badbfe6183d8e) /*  678 */, U64_C(0x0e206d7f5f166044) /*  679 */,
   U64_C(0xe258a43911cbca3e) /*  680 */, U64_C(0x723a1746b21dc0bc) /*  681 */,
   U64_C(0xc7caa854f5d7cdd3) /*  682 */, U64_C(0x7cac32883d261d9c) /*  683 */,
   U64_C(0x7690c26423ba942c) /*  684 */, U64_C(0x17e55524478042b8) /*  685 */,
   U64_C(0xe0be477656a2389f) /*  686 */, U64_C(0x4d289b5e67ab2da0) /*  687 */,
   U64_C(0x44862b9c8fbbfd31) /*  688 */, U64_C(0xb47cc8049d141365) /*  689 */,
   U64_C(0x822c1b362b91c793) /*  690 */, U64_C(0x4eb14655fb13dfd8) /*  691 */,
   U64_C(0x1ecbba0714e2a97b) /*  692 */, U64_C(0x6143459d5cde5f14) /*  693 */,
   U64_C(0x53a8fbf1d5f0ac89) /*  694 */, U64_C(0x97ea04d81c5e5b00) /*  695 */,
   U64_C(0x622181a8d4fdb3f3) /*  696 */, U64_C(0xe9bcd341572a1208) /*  697 */,
   U64_C(0x1411258643cce58a) /*  698 */, U64_C(0x9144c5fea4c6e0a4) /*  699 */,
   U64_C(0x0d33d06565cf620f) /*  700 */, U64_C(0x54a48d489f219ca1) /*  701 */,
   U64_C(0xc43e5eac6d63c821) /*  702 */, U64_C(0xa9728b3a72770daf) /*  703 */,
   U64_C(0xd7934e7b20df87ef) /*  704 */, U64_C(0xe35503b61a3e86e5) /*  705 */,
   U64_C(0xcae321fbc819d504) /*  706 */, U64_C(0x129a50b3ac60bfa6) /*  707 */,
   U64_C(0xcd5e68ea7e9fb6c3) /*  708 */, U64_C(0xb01c90199483b1c7) /*  709 */,
   U64_C(0x3de93cd5c295376c) /*  710 */, U64_C(0xaed52edf2ab9ad13) /*  711 */,
   U64_C(0x2e60f512c0a07884) /*  712 */, U64_C(0xbc3d86a3e36210c9) /*  713 */,
   U64_C(0x35269d9b163951ce) /*  714 */, U64_C(0x0c7d6e2ad0cdb5fa) /*  715 */,
   U64_C(0x59e86297d87f5733) /*  716 */, U64_C(0x298ef221898db0e7) /*  717 */,
   U64_C(0x55000029d1a5aa7e) /*  718 */, U64_C(0x8bc08ae1b5061b45) /*  719 */,
   U64_C(0xc2c31c2b6c92703a) /*  720 */, U64_C(0x94cc596baf25ef42) /*  721 */,
   U64_C(0x0a1d73db22540456) /*  722 */, U64_C(0x04b6a0f9d9c4179a) /*  723 */,
   U64_C(0xeffdafa2ae3d3c60) /*  724 */, U64_C(0xf7c8075bb49496c4) /*  725 */,
   U64_C(0x9cc5c7141d1cd4e3) /*  726 */, U64_C(0x78bd1638218e5534) /*  727 */,
   U64_C(0xb2f11568f850246a) /*  728 */, U64_C(0xedfabcfa9502bc29) /*  729 */,
   U64_C(0x796ce5f2da23051b) /*  730 */, U64_C(0xaae128b0dc93537c) /*  731 */,
   U64_C(0x3a493da0ee4b29ae) /*  732 */, U64_C(0xb5df6b2c416895d7) /*  733 */,
   U64_C(0xfcabbd25122d7f37) /*  734 */, U64_C(0x70810b58105dc4b1) /*  735 */,
   U64_C(0xe10fdd37f7882a90) /*  736 */, U64_C(0x524dcab5518a3f5c) /*  737 */,
   U64_C(0x3c9e85878451255b) /*  738 */, U64_C(0x4029828119bd34e2) /*  739 */,
   U64_C(0x74a05b6f5d3ceccb) /*  740 */, U64_C(0xb610021542e13eca) /*  741 */,
   U64_C(0x0ff979d12f59e2ac) /*  742 */, U64_C(0x6037da27e4f9cc50) /*  743 */,
   U64_C(0x5e92975a0df1847d) /*  744 */, U64_C(0xd66de190d3e623fe) /*  745 */,
   U64_C(0x5032d6b87b568048) /*  746 */, U64_C(0x9a36b7ce8235216e) /*  747 */,
   U64_C(0x80272a7a24f64b4a) /*  748 */, U64_C(0x93efed8b8c6916f7) /*  749 */,
   U64_C(0x37ddbff44cce1555) /*  750 */, U64_C(0x4b95db5d4b99bd25) /*  751 */,
   U64_C(0x92d3fda169812fc0) /*  752 */, U64_C(0xfb1a4a9a90660bb6) /*  753 */,
   U64_C(0x730c196946a4b9b2) /*  754 */, U64_C(0x81e289aa7f49da68) /*  755 */,
   U64_C(0x64669a0f83b1a05f) /*  756 */, U64_C(0x27b3ff7d9644f48b) /*  757 */,
   U64_C(0xcc6b615c8db675b3) /*  758 */, U64_C(0x674f20b9bcebbe95) /*  759 */,
   U64_C(0x6f31238275655982) /*  760 */, U64_C(0x5ae488713e45cf05) /*  761 */,
   U64_C(0xbf619f9954c21157) /*  762 */, U64_C(0xeabac46040a8eae9) /*  763 */,
   U64_C(0x454c6fe9f2c0c1cd) /*  764 */, U64_C(0x419cf6496412691c) /*  765 */,
   U64_C(0xd3dc3bef265b0f70) /*  766 */, U64_C(0x6d0e60f5c3578a9e) /*  767 */
 };
 static u64 sbox4[256] = {
   U64_C(0x5b0e608526323c55) /*  768 */, U64_C(0x1a46c1a9fa1b59f5) /*  769 */,
   U64_C(0xa9e245a17c4c8ffa) /*  770 */, U64_C(0x65ca5159db2955d7) /*  771 */,
   U64_C(0x05db0a76ce35afc2) /*  772 */, U64_C(0x81eac77ea9113d45) /*  773 */,
   U64_C(0x528ef88ab6ac0a0d) /*  774 */, U64_C(0xa09ea253597be3ff) /*  775 */,
   U64_C(0x430ddfb3ac48cd56) /*  776 */, U64_C(0xc4b3a67af45ce46f) /*  777 */,
   U64_C(0x4ececfd8fbe2d05e) /*  778 */, U64_C(0x3ef56f10b39935f0) /*  779 */,
   U64_C(0x0b22d6829cd619c6) /*  780 */, U64_C(0x17fd460a74df2069) /*  781 */,
   U64_C(0x6cf8cc8e8510ed40) /*  782 */, U64_C(0xd6c824bf3a6ecaa7) /*  783 */,
   U64_C(0x61243d581a817049) /*  784 */, U64_C(0x048bacb6bbc163a2) /*  785 */,
   U64_C(0xd9a38ac27d44cc32) /*  786 */, U64_C(0x7fddff5baaf410ab) /*  787 */,
   U64_C(0xad6d495aa804824b) /*  788 */, U64_C(0xe1a6a74f2d8c9f94) /*  789 */,
   U64_C(0xd4f7851235dee8e3) /*  790 */, U64_C(0xfd4b7f886540d893) /*  791 */,
   U64_C(0x247c20042aa4bfda) /*  792 */, U64_C(0x096ea1c517d1327c) /*  793 */,
   U64_C(0xd56966b4361a6685) /*  794 */, U64_C(0x277da5c31221057d) /*  795 */,
   U64_C(0x94d59893a43acff7) /*  796 */, U64_C(0x64f0c51ccdc02281) /*  797 */,
   U64_C(0x3d33bcc4ff6189db) /*  798 */, U64_C(0xe005cb184ce66af1) /*  799 */,
   U64_C(0xff5ccd1d1db99bea) /*  800 */, U64_C(0xb0b854a7fe42980f) /*  801 */,
   U64_C(0x7bd46a6a718d4b9f) /*  802 */, U64_C(0xd10fa8cc22a5fd8c) /*  803 */,
   U64_C(0xd31484952be4bd31) /*  804 */, U64_C(0xc7fa975fcb243847) /*  805 */,
   U64_C(0x4886ed1e5846c407) /*  806 */, U64_C(0x28cddb791eb70b04) /*  807 */,
   U64_C(0xc2b00be2f573417f) /*  808 */, U64_C(0x5c9590452180f877) /*  809 */,
   U64_C(0x7a6bddfff370eb00) /*  810 */, U64_C(0xce509e38d6d9d6a4) /*  811 */,
   U64_C(0xebeb0f00647fa702) /*  812 */, U64_C(0x1dcc06cf76606f06) /*  813 */,
   U64_C(0xe4d9f28ba286ff0a) /*  814 */, U64_C(0xd85a305dc918c262) /*  815 */,
   U64_C(0x475b1d8732225f54) /*  816 */, U64_C(0x2d4fb51668ccb5fe) /*  817 */,
   U64_C(0xa679b9d9d72bba20) /*  818 */, U64_C(0x53841c0d912d43a5) /*  819 */,
   U64_C(0x3b7eaa48bf12a4e8) /*  820 */, U64_C(0x781e0e47f22f1ddf) /*  821 */,
   U64_C(0xeff20ce60ab50973) /*  822 */, U64_C(0x20d261d19dffb742) /*  823 */,
   U64_C(0x16a12b03062a2e39) /*  824 */, U64_C(0x1960eb2239650495) /*  825 */,
   U64_C(0x251c16fed50eb8b8) /*  826 */, U64_C(0x9ac0c330f826016e) /*  827 */,
   U64_C(0xed152665953e7671) /*  828 */, U64_C(0x02d63194a6369570) /*  829 */,
   U64_C(0x5074f08394b1c987) /*  830 */, U64_C(0x70ba598c90b25ce1) /*  831 */,
   U64_C(0x794a15810b9742f6) /*  832 */, U64_C(0x0d5925e9fcaf8c6c) /*  833 */,
   U64_C(0x3067716cd868744e) /*  834 */, U64_C(0x910ab077e8d7731b) /*  835 */,
   U64_C(0x6a61bbdb5ac42f61) /*  836 */, U64_C(0x93513efbf0851567) /*  837 */,
   U64_C(0xf494724b9e83e9d5) /*  838 */, U64_C(0xe887e1985c09648d) /*  839 */,
   U64_C(0x34b1d3c675370cfd) /*  840 */, U64_C(0xdc35e433bc0d255d) /*  841 */,
   U64_C(0xd0aab84234131be0) /*  842 */, U64_C(0x08042a50b48b7eaf) /*  843 */,
   U64_C(0x9997c4ee44a3ab35) /*  844 */, U64_C(0x829a7b49201799d0) /*  845 */,
   U64_C(0x263b8307b7c54441) /*  846 */, U64_C(0x752f95f4fd6a6ca6) /*  847 */,
   U64_C(0x927217402c08c6e5) /*  848 */, U64_C(0x2a8ab754a795d9ee) /*  849 */,
   U64_C(0xa442f7552f72943d) /*  850 */, U64_C(0x2c31334e19781208) /*  851 */,
   U64_C(0x4fa98d7ceaee6291) /*  852 */, U64_C(0x55c3862f665db309) /*  853 */,
   U64_C(0xbd0610175d53b1f3) /*  854 */, U64_C(0x46fe6cb840413f27) /*  855 */,
   U64_C(0x3fe03792df0cfa59) /*  856 */, U64_C(0xcfe700372eb85e8f) /*  857 */,
   U64_C(0xa7be29e7adbce118) /*  858 */, U64_C(0xe544ee5cde8431dd) /*  859 */,
   U64_C(0x8a781b1b41f1873e) /*  860 */, U64_C(0xa5c94c78a0d2f0e7) /*  861 */,
   U64_C(0x39412e2877b60728) /*  862 */, U64_C(0xa1265ef3afc9a62c) /*  863 */,
   U64_C(0xbcc2770c6a2506c5) /*  864 */, U64_C(0x3ab66dd5dce1ce12) /*  865 */,
   U64_C(0xe65499d04a675b37) /*  866 */, U64_C(0x7d8f523481bfd216) /*  867 */,
   U64_C(0x0f6f64fcec15f389) /*  868 */, U64_C(0x74efbe618b5b13c8) /*  869 */,
   U64_C(0xacdc82b714273e1d) /*  870 */, U64_C(0xdd40bfe003199d17) /*  871 */,
   U64_C(0x37e99257e7e061f8) /*  872 */, U64_C(0xfa52626904775aaa) /*  873 */,
   U64_C(0x8bbbf63a463d56f9) /*  874 */, U64_C(0xf0013f1543a26e64) /*  875 */,
   U64_C(0xa8307e9f879ec898) /*  876 */, U64_C(0xcc4c27a4150177cc) /*  877 */,
   U64_C(0x1b432f2cca1d3348) /*  878 */, U64_C(0xde1d1f8f9f6fa013) /*  879 */,
   U64_C(0x606602a047a7ddd6) /*  880 */, U64_C(0xd237ab64cc1cb2c7) /*  881 */,
   U64_C(0x9b938e7225fcd1d3) /*  882 */, U64_C(0xec4e03708e0ff476) /*  883 */,
   U64_C(0xfeb2fbda3d03c12d) /*  884 */, U64_C(0xae0bced2ee43889a) /*  885 */,
   U64_C(0x22cb8923ebfb4f43) /*  886 */, U64_C(0x69360d013cf7396d) /*  887 */,
   U64_C(0x855e3602d2d4e022) /*  888 */, U64_C(0x073805bad01f784c) /*  889 */,
   U64_C(0x33e17a133852f546) /*  890 */, U64_C(0xdf4874058ac7b638) /*  891 */,
   U64_C(0xba92b29c678aa14a) /*  892 */, U64_C(0x0ce89fc76cfaadcd) /*  893 */,
   U64_C(0x5f9d4e0908339e34) /*  894 */, U64_C(0xf1afe9291f5923b9) /*  895 */,
   U64_C(0x6e3480f60f4a265f) /*  896 */, U64_C(0xeebf3a2ab29b841c) /*  897 */,
   U64_C(0xe21938a88f91b4ad) /*  898 */, U64_C(0x57dfeff845c6d3c3) /*  899 */,
   U64_C(0x2f006b0bf62caaf2) /*  900 */, U64_C(0x62f479ef6f75ee78) /*  901 */,
   U64_C(0x11a55ad41c8916a9) /*  902 */, U64_C(0xf229d29084fed453) /*  903 */,
   U64_C(0x42f1c27b16b000e6) /*  904 */, U64_C(0x2b1f76749823c074) /*  905 */,
   U64_C(0x4b76eca3c2745360) /*  906 */, U64_C(0x8c98f463b91691bd) /*  907 */,
   U64_C(0x14bcc93cf1ade66a) /*  908 */, U64_C(0x8885213e6d458397) /*  909 */,
   U64_C(0x8e177df0274d4711) /*  910 */, U64_C(0xb49b73b5503f2951) /*  911 */,
   U64_C(0x10168168c3f96b6b) /*  912 */, U64_C(0x0e3d963b63cab0ae) /*  913 */,
   U64_C(0x8dfc4b5655a1db14) /*  914 */, U64_C(0xf789f1356e14de5c) /*  915 */,
   U64_C(0x683e68af4e51dac1) /*  916 */, U64_C(0xc9a84f9d8d4b0fd9) /*  917 */,
   U64_C(0x3691e03f52a0f9d1) /*  918 */, U64_C(0x5ed86e46e1878e80) /*  919 */,
   U64_C(0x3c711a0e99d07150) /*  920 */, U64_C(0x5a0865b20c4e9310) /*  921 */,
   U64_C(0x56fbfc1fe4f0682e) /*  922 */, U64_C(0xea8d5de3105edf9b) /*  923 */,
   U64_C(0x71abfdb12379187a) /*  924 */, U64_C(0x2eb99de1bee77b9c) /*  925 */,
   U64_C(0x21ecc0ea33cf4523) /*  926 */, U64_C(0x59a4d7521805c7a1) /*  927 */,
   U64_C(0x3896f5eb56ae7c72) /*  928 */, U64_C(0xaa638f3db18f75dc) /*  929 */,
   U64_C(0x9f39358dabe9808e) /*  930 */, U64_C(0xb7defa91c00b72ac) /*  931 */,
   U64_C(0x6b5541fd62492d92) /*  932 */, U64_C(0x6dc6dee8f92e4d5b) /*  933 */,
   U64_C(0x353f57abc4beea7e) /*  934 */, U64_C(0x735769d6da5690ce) /*  935 */,
   U64_C(0x0a234aa642391484) /*  936 */, U64_C(0xf6f9508028f80d9d) /*  937 */,
   U64_C(0xb8e319a27ab3f215) /*  938 */, U64_C(0x31ad9c1151341a4d) /*  939 */,
   U64_C(0x773c22a57bef5805) /*  940 */, U64_C(0x45c7561a07968633) /*  941 */,
   U64_C(0xf913da9e249dbe36) /*  942 */, U64_C(0xda652d9b78a64c68) /*  943 */,
   U64_C(0x4c27a97f3bc334ef) /*  944 */, U64_C(0x76621220e66b17f4) /*  945 */,
   U64_C(0x967743899acd7d0b) /*  946 */, U64_C(0xf3ee5bcae0ed6782) /*  947 */,
   U64_C(0x409f753600c879fc) /*  948 */, U64_C(0x06d09a39b5926db6) /*  949 */,
   U64_C(0x6f83aeb0317ac588) /*  950 */, U64_C(0x01e6ca4a86381f21) /*  951 */,
   U64_C(0x66ff3462d19f3025) /*  952 */, U64_C(0x72207c24ddfd3bfb) /*  953 */,
   U64_C(0x4af6b6d3e2ece2eb) /*  954 */, U64_C(0x9c994dbec7ea08de) /*  955 */,
   U64_C(0x49ace597b09a8bc4) /*  956 */, U64_C(0xb38c4766cf0797ba) /*  957 */,
   U64_C(0x131b9373c57c2a75) /*  958 */, U64_C(0xb1822cce61931e58) /*  959 */,
   U64_C(0x9d7555b909ba1c0c) /*  960 */, U64_C(0x127fafdd937d11d2) /*  961 */,
   U64_C(0x29da3badc66d92e4) /*  962 */, U64_C(0xa2c1d57154c2ecbc) /*  963 */,
   U64_C(0x58c5134d82f6fe24) /*  964 */, U64_C(0x1c3ae3515b62274f) /*  965 */,
   U64_C(0xe907c82e01cb8126) /*  966 */, U64_C(0xf8ed091913e37fcb) /*  967 */,
   U64_C(0x3249d8f9c80046c9) /*  968 */, U64_C(0x80cf9bede388fb63) /*  969 */,
   U64_C(0x1881539a116cf19e) /*  970 */, U64_C(0x5103f3f76bd52457) /*  971 */,
   U64_C(0x15b7e6f5ae47f7a8) /*  972 */, U64_C(0xdbd7c6ded47e9ccf) /*  973 */,
   U64_C(0x44e55c410228bb1a) /*  974 */, U64_C(0xb647d4255edb4e99) /*  975 */,
   U64_C(0x5d11882bb8aafc30) /*  976 */, U64_C(0xf5098bbb29d3212a) /*  977 */,
   U64_C(0x8fb5ea14e90296b3) /*  978 */, U64_C(0x677b942157dd025a) /*  979 */,
   U64_C(0xfb58e7c0a390acb5) /*  980 */, U64_C(0x89d3674c83bd4a01) /*  981 */,
   U64_C(0x9e2da4df4bf3b93b) /*  982 */, U64_C(0xfcc41e328cab4829) /*  983 */,
   U64_C(0x03f38c96ba582c52) /*  984 */, U64_C(0xcad1bdbd7fd85db2) /*  985 */,
   U64_C(0xbbb442c16082ae83) /*  986 */, U64_C(0xb95fe86ba5da9ab0) /*  987 */,
   U64_C(0xb22e04673771a93f) /*  988 */, U64_C(0x845358c9493152d8) /*  989 */,
   U64_C(0xbe2a488697b4541e) /*  990 */, U64_C(0x95a2dc2dd38e6966) /*  991 */,
   U64_C(0xc02c11ac923c852b) /*  992 */, U64_C(0x2388b1990df2a87b) /*  993 */,
   U64_C(0x7c8008fa1b4f37be) /*  994 */, U64_C(0x1f70d0c84d54e503) /*  995 */,
   U64_C(0x5490adec7ece57d4) /*  996 */, U64_C(0x002b3c27d9063a3a) /*  997 */,
   U64_C(0x7eaea3848030a2bf) /*  998 */, U64_C(0xc602326ded2003c0) /*  999 */,
   U64_C(0x83a7287d69a94086) /* 1000 */, U64_C(0xc57a5fcb30f57a8a) /* 1001 */,
   U64_C(0xb56844e479ebe779) /* 1002 */, U64_C(0xa373b40f05dcbce9) /* 1003 */,
   U64_C(0xd71a786e88570ee2) /* 1004 */, U64_C(0x879cbacdbde8f6a0) /* 1005 */,
   U64_C(0x976ad1bcc164a32f) /* 1006 */, U64_C(0xab21e25e9666d78b) /* 1007 */,
   U64_C(0x901063aae5e5c33c) /* 1008 */, U64_C(0x9818b34448698d90) /* 1009 */,
   U64_C(0xe36487ae3e1e8abb) /* 1010 */, U64_C(0xafbdf931893bdcb4) /* 1011 */,
   U64_C(0x6345a0dc5fbbd519) /* 1012 */, U64_C(0x8628fe269b9465ca) /* 1013 */,
   U64_C(0x1e5d01603f9c51ec) /* 1014 */, U64_C(0x4de44006a15049b7) /* 1015 */,
   U64_C(0xbf6c70e5f776cbb1) /* 1016 */, U64_C(0x411218f2ef552bed) /* 1017 */,
   U64_C(0xcb0c0708705a36a3) /* 1018 */, U64_C(0xe74d14754f986044) /* 1019 */,
   U64_C(0xcd56d9430ea8280e) /* 1020 */, U64_C(0xc12591d7535f5065) /* 1021 */,
   U64_C(0xc83223f1720aef96) /* 1022 */, U64_C(0xc3a0396f7363a51f) /* 1023 */
 };
 
 static unsigned int
 transform ( void *ctx, const unsigned char *data, size_t nblks );
 
 static void
 do_init (void *context, int variant)
 {
   TIGER_CONTEXT *hd = context;
 
   hd->a = 0x0123456789abcdefLL;
   hd->b = 0xfedcba9876543210LL;
   hd->c = 0xf096a5b4c3b2e187LL;
 
   hd->bctx.nblocks = 0;
   hd->bctx.nblocks_high = 0;
   hd->bctx.count = 0;
   hd->bctx.blocksize_shift = _gcry_ctz(64);
   hd->bctx.bwrite = transform;
   hd->variant = variant;
 }
 
 static void
 tiger_init (void *context, unsigned int flags)
 {
   (void)flags;
 
   do_init (context, 0);
 }
 
 static void
 tiger1_init (void *context, unsigned int flags)
 {
   (void)flags;
 
   do_init (context, 1);
 }
 
 static void
 tiger2_init (void *context, unsigned int flags)
 {
   (void)flags;
 
   do_init (context, 2);
 }
 
 
 #define tiger_round(xa, xb, xc, xx, xmul) { \
   xc ^= xx; \
   xa -= (  sbox1[  (xc)        & 0xff ] ^ sbox2[ ((xc) >> 16) & 0xff ] \
          ^ sbox3[ ((xc) >> 32) & 0xff ] ^ sbox4[ ((xc) >> 48) & 0xff ]); \
   xb += (  sbox4[ ((xc) >>  8) & 0xff ] ^ sbox3[ ((xc) >> 24) & 0xff ] \
          ^ sbox2[ ((xc) >> 40) & 0xff ] ^ sbox1[ ((xc) >> 56) & 0xff ]); \
   xb *= xmul; }
 
 
 #define pass(ya, yb, yc, yx, ymul) { \
   tiger_round( ya, yb, yc, yx[0], ymul ); \
   tiger_round( yb, yc, ya, yx[1], ymul ); \
   tiger_round( yc, ya, yb, yx[2], ymul ); \
   tiger_round( ya, yb, yc, yx[3], ymul ); \
   tiger_round( yb, yc, ya, yx[4], ymul ); \
   tiger_round( yc, ya, yb, yx[5], ymul ); \
   tiger_round( ya, yb, yc, yx[6], ymul ); \
   tiger_round( yb, yc, ya, yx[7], ymul ); }
 
 
 #define key_schedule(x) { \
   x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; \
   x[1] ^= x[0]; \
   x[2] += x[1]; \
   x[3] -= x[2] ^ ((~x[1]) << 19 ); \
   x[4] ^= x[3]; \
   x[5] += x[4]; \
   x[6] -= x[5] ^ ((~x[4]) >> 23 ); \
   x[7] ^= x[6]; \
   x[0] += x[7]; \
   x[1] -= x[0] ^ ((~x[7]) << 19 ); \
   x[2] ^= x[1]; \
   x[3] += x[2]; \
   x[4] -= x[3] ^ ((~x[2]) >> 23 ); \
   x[5] ^= x[4]; \
   x[6] += x[5]; \
   x[7] -= x[6] ^ 0x0123456789abcdefLL; }
 
 
 /****************
  * Transform the message DATA which consists of 512 bytes (8 words)
  */
 static unsigned int
 transform_blk ( void *ctx, const unsigned char *data )
 {
   TIGER_CONTEXT *hd = ctx;
   u64 a,b,c,aa,bb,cc;
   u64 x[8];
   int i;
 
   for ( i = 0; i < 8; i++ )
     x[i] = buf_get_le64(data + i * 8);
 
   /* save */
   a = aa = hd->a;
   b = bb = hd->b;
   c = cc = hd->c;
 
   pass( a, b, c, x, 5);
   key_schedule( x );
   pass( c, a, b, x, 7);
   key_schedule( x );
   pass( b, c, a, x, 9);
 
   /* feedforward */
   a ^= aa;
   b -= bb;
   c += cc;
   /* store */
   hd->a = a;
   hd->b = b;
   hd->c = c;
 
   return /*burn_stack*/ 21*8+11*sizeof(void*);
 }
 
 
 static unsigned int
 transform ( void *c, const unsigned char *data, size_t nblks )
 {
   unsigned int burn;
 
   do
     {
       burn = transform_blk (c, data);
       data += 64;
     }
   while (--nblks);
 
   return burn;
 }
 
 
 
 /* The routine terminates the computation
  */
 static void
 tiger_final( void *context )
 {
   TIGER_CONTEXT *hd = context;
   u32 t, th, msb, lsb;
   byte *p;
   unsigned int burn;
   byte pad = hd->variant == 2? 0x80 : 0x01;
 
   t = hd->bctx.nblocks;
   if (sizeof t == sizeof hd->bctx.nblocks)
     th = hd->bctx.nblocks_high;
   else
     th = hd->bctx.nblocks >> 32;
 
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
   msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
   if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
   lsb <<= 3;
   msb <<= 3;
   msb |= t >> 29;
 
   if( hd->bctx.count < 56 )  /* enough room */
     {
       hd->bctx.buf[hd->bctx.count++] = pad;
       if (hd->bctx.count < 56)
 	memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
 
       /* append the 64 bit count */
       buf_put_le32(hd->bctx.buf + 56, lsb);
       buf_put_le32(hd->bctx.buf + 60, msb);
       burn = transform( hd, hd->bctx.buf, 1 );
     }
   else  /* need one extra block */
     {
       hd->bctx.buf[hd->bctx.count++] = pad; /* pad character */
       /* fill pad and next block with zeroes */
       memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
 
       /* append the 64 bit count */
       buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
       buf_put_le32(hd->bctx.buf + 64 + 60, msb);
       burn = transform( hd, hd->bctx.buf, 2 );
     }
 
   p = hd->bctx.buf;
 #define X(a) do { buf_put_be64(p, hd->a); p += 8; } while(0)
 #define Y(a) do { buf_put_le64(p, hd->a); p += 8; } while(0)
   if (hd->variant == 0)
     {
       X(a);
       X(b);
       X(c);
     }
   else
     {
       Y(a);
       Y(b);
       Y(c);
     }
 #undef X
 #undef Y
 
   hd->bctx.count = 0;
 
   _gcry_burn_stack (burn);
 }
 
 static byte *
 tiger_read( void *context )
 {
   TIGER_CONTEXT *hd = context;
 
   return hd->bctx.buf;
 }
 
 
 
 /* This is the old TIGER variant based on the unfixed reference
    implementation.  IT was used in GnupG up to 1.3.2.  We don't provide
    an OID anymore because that would not be correct.  */
 const gcry_md_spec_t _gcry_digest_spec_tiger =
   {
     GCRY_MD_TIGER, {0, 0},
     "TIGER192", NULL, 0, NULL, 24,
     tiger_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
     NULL,
     sizeof (TIGER_CONTEXT)
   };
 
 
 
 /* This is the fixed TIGER implementation.  */
 static const byte asn1[19] = /* Object ID is 1.3.6.1.4.1.11591.12.2 */
   { 0x30, 0x29, 0x30, 0x0d, 0x06, 0x09, 0x2b, 0x06,
     0x01, 0x04, 0x01, 0xda, 0x47, 0x0c, 0x02,
     0x05, 0x00, 0x04, 0x18 };
 
 static const gcry_md_oid_spec_t oid_spec_tiger1[] =
   {
     /* GNU.digestAlgorithm TIGER */
     { "1.3.6.1.4.1.11591.12.2" },
     { NULL }
   };
 
 const gcry_md_spec_t _gcry_digest_spec_tiger1 =
   {
     GCRY_MD_TIGER1, {0, 0},
     "TIGER", asn1, DIM (asn1), oid_spec_tiger1, 24,
     tiger1_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
     NULL,
     sizeof (TIGER_CONTEXT)
   };
 
 
 
 /* This is TIGER2 which usues a changed padding algorithm.  */
 const gcry_md_spec_t _gcry_digest_spec_tiger2 =
   {
     GCRY_MD_TIGER2, {0, 0},
     "TIGER2", NULL, 0, NULL, 24,
     tiger2_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
     NULL,
     sizeof (TIGER_CONTEXT)
   };
diff --git a/cipher/twofish.c b/cipher/twofish.c
index e5eae770..74061913 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -1,1854 +1,1854 @@
 /* Twofish for GPG
  * Copyright (C) 1998, 2002, 2003 Free Software Foundation, Inc.
  * Written by Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
  * 256-bit key length added March 20, 1999
  * Some modifications to reduce the text size by Werner Koch, April, 1998
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  ********************************************************************
  *
  * This code is a "clean room" implementation, written from the paper
  * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
  * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
  * through http://www.counterpane.com/twofish.html
  *
  * For background information on multiplication in finite fields, used for
  * the matrix operations in the key schedule, see the book _Contemporary
  * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
  * Third Edition.
  *
  * Only the 128- and 256-bit key sizes are supported.  This code is intended
  * for GNU C on a 32-bit system, but it should work almost anywhere.  Loops
  * are unrolled, precomputation tables are used, etc., for maximum speed at
  * some cost in memory consumption. */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h> /* for memcmp() */
 
 #include "types.h"  /* for byte and u32 typedefs */
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
 #include "cipher-internal.h"
 #include "bulkhelp.h"
 
 
 #define TWOFISH_BLOCKSIZE 16
 
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 /* USE_ARM_ASM indicates whether to use ARM assembly code. */
 #undef USE_ARM_ASM
 #if defined(__ARMEL__)
 # if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
 #  define USE_ARM_ASM 1
 # endif
 #endif
 # if defined(__AARCH64EL__)
 #  ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
 #   define USE_ARM_ASM 1
 #  endif
 # endif
 
 /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
 #undef USE_AVX2
 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # if defined(ENABLE_AVX2_SUPPORT)
 #  define USE_AVX2 1
 # endif
 #endif
 
 
 /* Prototype for the self-test function. */
 static const char *selftest(void);
 
 
 /* Prototypes for the bulk functions. */
 static void _gcry_twofish_ctr_enc (void *context, unsigned char *ctr,
 				   void *outbuf_arg, const void *inbuf_arg,
 				   size_t nblocks);
 static void _gcry_twofish_cbc_dec (void *context, unsigned char *iv,
 				   void *outbuf_arg, const void *inbuf_arg,
 				   size_t nblocks);
 static void _gcry_twofish_cfb_dec (void *context, unsigned char *iv,
 				   void *outbuf_arg, const void *inbuf_arg,
 				   size_t nblocks);
 static size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 				       const void *inbuf_arg, size_t nblocks,
 				       int encrypt);
 static size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 				      size_t nblocks);
 static void _gcry_twofish_xts_crypt (void *context, unsigned char *tweak,
 				     void *outbuf_arg, const void *inbuf_arg,
 				     size_t nblocks, int encrypt);
 static void _gcry_twofish_ecb_crypt (void *context, void *outbuf_arg,
 				     const void *inbuf_arg, size_t nblocks,
 				     int encrypt);
 
 /* Structure for an expanded Twofish key.  s contains the key-dependent
  * S-boxes composed with the MDS matrix; w contains the eight "whitening"
  * subkeys, K[0] through K[7].	k holds the remaining, "round" subkeys.  Note
  * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
 typedef struct {
    u32 s[4][256], w[8], k[32];
 
 #ifdef USE_AVX2
   int use_avx2;
 #endif
 } TWOFISH_context;
 
 
 /* Assembly implementations use SystemV ABI, ABI conversion and additional
  * stack to store XMM6-XMM15 needed on Win64. */
 #undef ASM_FUNC_ABI
 #if defined(USE_AVX2)
 # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
 #  define ASM_FUNC_ABI __attribute__((sysv_abi))
 # else
 #  define ASM_FUNC_ABI
 # endif
 #endif
 
 
 /* These two tables are the q0 and q1 permutations, exactly as described in
  * the Twofish paper. */
 
 static const byte q0[256] = {
    0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
    0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
    0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
    0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
    0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
    0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
    0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
    0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
    0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
    0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
    0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
    0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
    0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
    0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
    0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
    0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
    0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
    0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
    0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
    0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
    0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
    0x4A, 0x5E, 0xC1, 0xE0
 };
 
 static const byte q1[256] = {
    0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
    0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
    0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
    0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
    0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
    0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
    0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
    0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
    0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
    0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
    0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
    0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
    0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
    0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
    0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
    0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
    0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
    0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
    0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
    0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
    0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
    0x55, 0x09, 0xBE, 0x91
 };
 
 /* These MDS tables are actually tables of MDS composed with q0 and q1,
  * because it is only ever used that way and we can save some time by
  * precomputing.  Of course the main saving comes from precomputing the
  * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
  * things up in these tables we reduce the matrix multiply to four lookups
  * and three XORs.  Semi-formally, the definition of these tables is:
  * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
  * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
  * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
  * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
  * by Schneier et al, and I'm casually glossing over the byte/word
  * conversion issues. */
 
 static const u32 mds[4][256] = {
    {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
     0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
     0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
     0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
     0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
     0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
     0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
     0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
     0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
     0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
     0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
     0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
     0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
     0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
     0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
     0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
     0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
     0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
     0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
     0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
     0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
     0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
     0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
     0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
     0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
     0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
     0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
     0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
     0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
     0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
     0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
     0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
     0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
     0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
     0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
     0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
     0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
     0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
     0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
     0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
     0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
     0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
     0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
 
    {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
     0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
     0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
     0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
     0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
     0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
     0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
     0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
     0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
     0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
     0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
     0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
     0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
     0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
     0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
     0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
     0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
     0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
     0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
     0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
     0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
     0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
     0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
     0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
     0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
     0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
     0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
     0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
     0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
     0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
     0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
     0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
     0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
     0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
     0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
     0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
     0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
     0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
     0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
     0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
     0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
     0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
     0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
 
    {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
     0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
     0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
     0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
     0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
     0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
     0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
     0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
     0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
     0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
     0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
     0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
     0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
     0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
     0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
     0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
     0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
     0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
     0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
     0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
     0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
     0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
     0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
     0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
     0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
     0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
     0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
     0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
     0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
     0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
     0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
     0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
     0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
     0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
     0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
     0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
     0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
     0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
     0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
     0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
     0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
     0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
     0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
 
    {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
     0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
     0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
     0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
     0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
     0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
     0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
     0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
     0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
     0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
     0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
     0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
     0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
     0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
     0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
     0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
     0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
     0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
     0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
     0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
     0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
     0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
     0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
     0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
     0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
     0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
     0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
     0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
     0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
     0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
     0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
     0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
     0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
     0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
     0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
     0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
     0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
     0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
     0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
     0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
     0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
     0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
     0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
 };
 
 /* The exp_to_poly and poly_to_exp tables are used to perform efficient
  * operations in GF(2^8) represented as GF(2)[x]/w(x) where
  * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
  * definition of the RS matrix in the key schedule.  Elements of that field
  * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
  * which can be represented naturally by bytes (just substitute x=2).  In that
  * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
  * multiplication is inefficient without hardware support.  To multiply
  * faster, I make use of the fact x is a generator for the nonzero elements,
  * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
  * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
  * *not* polynomial notation.  So if I want to compute pq where p and q are
  * in GF(2^8), I can just say:
  *    1. if p=0 or q=0 then pq=0
  *    2. otherwise, find m and n such that p=x^m and q=x^n
  *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
  * The translations in steps 2 and 3 are looked up in the tables
  * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
  * in action, look at the CALC_S macro.  As additional wrinkles, note that
  * one of my operands is always a constant, so the poly_to_exp lookup on it
  * is done in advance; I included the original values in the comments so
  * readers can have some chance of recognizing that this *is* the RS matrix
  * from the Twofish paper.  I've only included the table entries I actually
  * need; I never do a lookup on a variable input of zero and the biggest
  * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
  * never sum to more than 491.	I'm repeating part of the exp_to_poly table
  * so that I don't have to do mod-255 reduction in the exponent arithmetic.
  * Since I know my constant operands are never zero, I only have to worry
  * about zero values in the variable operand, and I do it with a simple
  * conditional branch.	I know conditionals are expensive, but I couldn't
  * see a non-horrible way of avoiding them, and I did manage to group the
  * statements so that each if covers four group multiplications. */
 
 static const u16 poly_to_exp[256] = {
    492,
    0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
    0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
    0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
    0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
    0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
    0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
    0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
    0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
    0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
    0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
    0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
    0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
    0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
    0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
    0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
    0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
    0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
    0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
    0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
    0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
    0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
    0x85, 0xC8, 0xA1
 };
 
 static const byte exp_to_poly[492 + 256] = {
    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
    0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
    0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
    0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
    0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
    0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
    0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
    0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
    0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
    0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
    0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
    0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
    0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
    0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
    0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
    0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
    0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
    0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
    0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
    0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
    0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
    0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
    0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
    0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
    0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
    0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
    0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
    0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
    0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
    0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
    0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
    0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
    0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
    0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
    0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
    0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
    0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
    0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
    0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
    0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
    0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB,
 };
 
 
 /* The table constants are indices of
  * S-box entries, preprocessed through q0 and q1. */
 static byte calc_sb_tbl[512] = {
     0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
     0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
     0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
     0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
     0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
     0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
     0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
     0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
     0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
     0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
     0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
     0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
     0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
     0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
     0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
     0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
     0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
     0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
     0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
     0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
     0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
     0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
     0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
     0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
     0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
     0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
     0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
     0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
     0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
     0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
     0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
     0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
     0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
     0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
     0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
     0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
     0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
     0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
     0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
     0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
     0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
     0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
     0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
     0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
     0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
     0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
     0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
     0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
     0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
     0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
     0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
     0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
     0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
     0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
     0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
     0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
     0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
     0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
     0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
     0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
     0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
     0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
     0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
     0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
 };
 
 /* Macro to perform one column of the RS matrix multiplication.  The
  * parameters a, b, c, and d are the four bytes of output; i is the index
  * of the key bytes, and w, x, y, and z, are the column of constants from
  * the RS matrix, preprocessed through the poly_to_exp table. */
 
 #define CALC_S(a, b, c, d, i, w, x, y, z) \
    { \
       tmp = poly_to_exp[key[i]]; \
       (a) ^= exp_to_poly[tmp + (w)]; \
       (b) ^= exp_to_poly[tmp + (x)]; \
       (c) ^= exp_to_poly[tmp + (y)]; \
       (d) ^= exp_to_poly[tmp + (z)]; \
    }
 
 /* Macros to calculate the key-dependent S-boxes for a 128-bit key using
  * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
  * four S-boxes, where i is the index of the entry to compute, and a and b
  * are the index numbers preprocessed through the q0 and q1 tables
  * respectively.  CALC_SB is simply a convenience to make the code shorter;
  * it calls CALC_SB_2 four times with consecutive indices from i to i+3,
  * using the remaining parameters two by two. */
 
 #define CALC_SB_2(i, a, b) \
    ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
    ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
    ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
    ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
 
 #define CALC_SB(i, a, b, c, d, e, f, g, h) \
    CALC_SB_2 (i, a, b); CALC_SB_2 ((i)+1, c, d); \
    CALC_SB_2 ((i)+2, e, f); CALC_SB_2 ((i)+3, g, h)
 
 /* Macros exactly like CALC_SB and CALC_SB_2, but for 256-bit keys. */
 
 #define CALC_SB256_2(i, a, b) \
    ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
    ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
    ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
    ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
 
 #define CALC_SB256(i, a, b, c, d, e, f, g, h) \
    CALC_SB256_2 (i, a, b); CALC_SB256_2 ((i)+1, c, d); \
    CALC_SB256_2 ((i)+2, e, f); CALC_SB256_2 ((i)+3, g, h)
 
 /* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
  * last two stages of the h() function for a given index (either 2i or 2i+1).
  * a, b, c, and d are the four bytes going into the last two stages.  For
  * 128-bit keys, this is the entire h() function and a and c are the index
  * preprocessed through q0 and q1 respectively; for longer keys they are the
  * output of previous stages.  j is the index of the first key byte to use.
  * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
  * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
  * rotations.  Its parameters are: a, the array to write the results into,
  * j, the index of the first output entry, k and l, the preprocessed indices
  * for index 2i, and m and n, the preprocessed indices for index 2i+1.
  * CALC_K256_2 expands CALC_K_2 to handle 256-bit keys, by doing two
  * additional lookup-and-XOR stages.  The parameters a and b are the index
  * preprocessed through q0 and q1 respectively; j is the index of the first
  * key byte to use.  CALC_K256 is identical to CALC_K but for using the
  * CALC_K256_2 macro instead of CALC_K_2. */
 
 #define CALC_K_2(a, b, c, d, j) \
      mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
    ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
    ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
    ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
 
 #define CALC_K(a, j, k, l, m, n) \
    x = CALC_K_2 (k, l, k, l, 0); \
    y = CALC_K_2 (m, n, m, n, 4); \
    y = (y << 8) + (y >> 24); \
    x += y; y += x; ctx->a[j] = x; \
    ctx->a[(j) + 1] = (y << 9) + (y >> 23)
 
 #define CALC_K256_2(a, b, j) \
    CALC_K_2 (q0[q1[b ^ key[(j) + 24]] ^ key[(j) + 16]], \
 	     q1[q1[a ^ key[(j) + 25]] ^ key[(j) + 17]], \
 	     q0[q0[a ^ key[(j) + 26]] ^ key[(j) + 18]], \
 	     q1[q0[b ^ key[(j) + 27]] ^ key[(j) + 19]], j)
 
 #define CALC_K256(a, j, k, l, m, n) \
    x = CALC_K256_2 (k, l, 0); \
    y = CALC_K256_2 (m, n, 4); \
    y = (y << 8) + (y >> 24); \
    x += y; y += x; ctx->a[j] = x; \
    ctx->a[(j) + 1] = (y << 9) + (y >> 23)
 
 
 
 /* Perform the key setup.  Note that this works only with 128- and 256-bit
  * keys, despite the API that looks like it might support other sizes. */
 
 static gcry_err_code_t
 do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen)
 {
   int i, j, k;
 
   /* Temporaries for CALC_K. */
   u32 x, y;
 
   /* The S vector used to key the S-boxes, split up into individual bytes.
    * 128-bit keys use only sa through sh; 256-bit use all of them. */
   byte sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
   byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
 
   /* Temporary for CALC_S. */
   unsigned int tmp;
 
   /* Flags for self-test. */
   static int initialized = 0;
   static const char *selftest_failed=0;
 
   /* Check key length. */
   if( ( ( keylen - 16 ) | 16 ) != 16 )
     return GPG_ERR_INV_KEYLEN;
 
   /* Do self-test if necessary. */
   if (!initialized)
     {
       initialized = 1;
       selftest_failed = selftest ();
       if( selftest_failed )
         log_error("%s\n", selftest_failed );
     }
   if( selftest_failed )
     return GPG_ERR_SELFTEST_FAILED;
 
   /* Compute the first two words of the S vector.  The magic numbers are
    * the entries of the RS matrix, preprocessed through poly_to_exp.	The
    * numbers in the comments are the original (polynomial form) matrix
    * entries. */
   CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
   CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
   CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
   CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
   CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
   CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
   CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
   CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
   CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
   CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
   CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
   CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
   CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
   CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
   CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
   CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
 
   if (keylen == 32)  /* 256-bit key */
     {
       /* Calculate the remaining two words of the S vector */
       CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
       CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
       CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
       CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
       CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
       CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
       CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
       CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
       CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
       CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
       CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
       CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
       CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
       CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
       CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
       CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
 
       /* Compute the S-boxes. */
       for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 )
         {
           CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
 	}
 
       /* Calculate whitening and round subkeys. */
       for (i = 0; i < 8; i += 2)
 	{
 	  CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
 	}
       for (j = 0; j < 32; j += 2, i += 2)
 	{
 	  CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
 	}
     }
   else
     {
       /* Compute the S-boxes. */
       for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 )
         {
           CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
         }
 
       /* Calculate whitening and round subkeys. */
       for (i = 0; i < 8; i += 2)
 	{
 	  CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
 	}
       for (j = 0; j < 32; j += 2, i += 2)
 	{
 	  CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
 	}
     }
 
   return 0;
 }
 
 static gcry_err_code_t
 twofish_setkey (void *context, const byte *key, unsigned int keylen,
                 cipher_bulk_ops_t *bulk_ops)
 {
   TWOFISH_context *ctx = context;
   unsigned int hwfeatures = _gcry_get_hw_features ();
   int rc;
 
   rc = do_twofish_setkey (ctx, key, keylen);
 
 #ifdef USE_AVX2
   ctx->use_avx2 = 0;
   if ((hwfeatures & HWF_INTEL_AVX2) && (hwfeatures & HWF_INTEL_FAST_VPGATHER))
     {
       ctx->use_avx2 = 1;
     }
 #endif
 
   /* Setup bulk encryption routines.  */
   memset (bulk_ops, 0, sizeof(*bulk_ops));
   bulk_ops->cbc_dec = _gcry_twofish_cbc_dec;
   bulk_ops->cfb_dec = _gcry_twofish_cfb_dec;
   bulk_ops->ctr_enc = _gcry_twofish_ctr_enc;
   bulk_ops->ocb_crypt = _gcry_twofish_ocb_crypt;
   bulk_ops->ocb_auth = _gcry_twofish_ocb_auth;
   bulk_ops->xts_crypt = _gcry_twofish_xts_crypt;
   bulk_ops->ecb_crypt = _gcry_twofish_ecb_crypt;
 
   (void)hwfeatures;
 
   _gcry_burn_stack (23+6*sizeof(void*));
   return rc;
 }
 
 
 #ifdef USE_AVX2
 /* Assembler implementations of Twofish using AVX2.  Process 16 block in
    parallel.
  */
 extern void _gcry_twofish_avx2_blk16 (const TWOFISH_context *c, byte *out,
 				      const byte *in, int encrypt) ASM_FUNC_ABI;
 
 extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_twofish_avx2_cbc_dec(const TWOFISH_context *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_twofish_avx2_cfb_dec(const TWOFISH_context *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_twofish_avx2_ocb_enc(const TWOFISH_context *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *offset,
 				       unsigned char *checksum,
 				       const u64 Ls[16]) ASM_FUNC_ABI;
 
 extern void _gcry_twofish_avx2_ocb_dec(const TWOFISH_context *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
 				       unsigned char *offset,
 				       unsigned char *checksum,
 				       const u64 Ls[16]) ASM_FUNC_ABI;
 
 extern void _gcry_twofish_avx2_ocb_auth(const TWOFISH_context *ctx,
 					const unsigned char *abuf,
 					unsigned char *offset,
 					unsigned char *checksum,
 					const u64 Ls[16]) ASM_FUNC_ABI;
 #endif
 
 
 #ifdef USE_AMD64_ASM
 
 /* Assembly implementations of Twofish. */
 extern void _gcry_twofish_amd64_encrypt_block(const TWOFISH_context *c,
 					      byte *out, const byte *in);
 
 extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c,
 					      byte *out, const byte *in);
 
 /* These assembly implementations process three blocks in parallel. */
 extern void _gcry_twofish_amd64_blk3(const TWOFISH_context *c, byte *out,
 				     const byte *in, int encrypt);
 
 extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out,
 					const byte *in, byte *ctr);
 
 extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
 					const byte *in, byte *iv);
 
 extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
 					const byte *in, byte *iv);
 
 extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out,
 					const byte *in, byte *offset,
 					byte *checksum, const u64 Ls[3]);
 
 extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out,
 					const byte *in, byte *offset,
 					byte *checksum, const u64 Ls[3]);
 
 extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx,
 					 const byte *abuf, byte *offset,
 					 byte *checksum, const u64 Ls[3]);
 
 static inline void
 twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
 {
   _gcry_twofish_amd64_encrypt_block(c, out, in);
 }
 
 static inline void
 twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
 {
   _gcry_twofish_amd64_decrypt_block(c, out, in);
 }
 
 static inline void
 twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in,
                       byte *ctr)
 {
   _gcry_twofish_amd64_ctr_enc(c, out, in, ctr);
 }
 
 static inline void
 twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in,
                       byte *iv)
 {
   _gcry_twofish_amd64_cbc_dec(c, out, in, iv);
 }
 
 static inline void
 twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
                       byte *iv)
 {
   _gcry_twofish_amd64_cfb_dec(c, out, in, iv);
 }
 
 static inline void
 twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in,
 		      byte *offset, byte *checksum, const u64 Ls[3])
 {
   _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls);
 }
 
 static inline void
 twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in,
 		      byte *offset, byte *checksum, const u64 Ls[3])
 {
   _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls);
 }
 
 static inline void
 twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf,
 		       byte *offset, byte *checksum, const u64 Ls[3])
 {
   _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls);
 }
 
 #elif defined(USE_ARM_ASM)
 
 /* Assembly implementations of Twofish. */
 extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c,
 					      byte *out, const byte *in);
 
 extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c,
 					      byte *out, const byte *in);
 
 #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 /* Macros to compute the g() function in the encryption and decryption
  * rounds.  G1 is the straight g() function; G2 includes the 8-bit
  * rotation for the high 32-bit word. */
 
 #define G1(a) \
      (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \
    ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24])
 
 #define G2(b) \
      (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \
    ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24])
 
 /* Encryption and decryption Feistel rounds.  Each one calls the two g()
  * macros, does the PHT, and performs the XOR and the appropriate bit
  * rotations.  The parameters are the round number (used to select subkeys),
  * and the four 32-bit chunks of the text. */
 
 #define ENCROUND(n, a, b, c, d) \
    x = G1 (a); y = G2 (b); \
    x += y; y += x + ctx->k[2 * (n) + 1]; \
    (c) ^= x + ctx->k[2 * (n)]; \
    (c) = ((c) >> 1) + ((c) << 31); \
    (d) = (((d) << 1)+((d) >> 31)) ^ y
 
 #define DECROUND(n, a, b, c, d) \
    x = G1 (a); y = G2 (b); \
    x += y; y += x; \
    (d) ^= y + ctx->k[2 * (n) + 1]; \
    (d) = ((d) >> 1) + ((d) << 31); \
    (c) = (((c) << 1)+((c) >> 31)); \
    (c) ^= (x + ctx->k[2 * (n)])
 
 /* Encryption and decryption cycles; each one is simply two Feistel rounds
  * with the 32-bit chunks re-ordered to simulate the "swap" */
 
 #define ENCCYCLE(n) \
    ENCROUND (2 * (n), a, b, c, d); \
    ENCROUND (2 * (n) + 1, c, d, a, b)
 
 #define DECCYCLE(n) \
    DECROUND (2 * (n) + 1, c, d, a, b); \
    DECROUND (2 * (n), a, b, c, d)
 
 /* Macros to convert the input and output bytes into 32-bit words,
  * and simultaneously perform the whitening step.  INPACK packs word
  * number n into the variable named by x, using whitening subkey number m.
  * OUTUNPACK unpacks word number n from the variable named by x, using
  * whitening subkey number m. */
 
 #define INPACK(n, x, m) \
    x = buf_get_le32(in + (n) * 4); \
    x ^= ctx->w[m]
 
 #define OUTUNPACK(n, x, m) \
    x ^= ctx->w[m]; \
    buf_put_le32(out + (n) * 4, x)
 
 #endif /*!USE_AMD64_ASM*/
 
 
 /* Encrypt one block.  in and out may be the same. */
 
 #ifdef USE_AMD64_ASM
 
 static unsigned int
 twofish_encrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
   twofish_amd64_encrypt_block(ctx, out, in);
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
 #elif defined(USE_ARM_ASM)
 
 static unsigned int
 twofish_encrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
   _gcry_twofish_arm_encrypt_block(ctx, out, in);
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
 #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 static void
 do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
 {
   /* The four 32-bit chunks of the text. */
   u32 a, b, c, d;
 
   /* Temporaries used by the round function. */
   u32 x, y;
 
   /* Input whitening and packing. */
   INPACK (0, a, 0);
   INPACK (1, b, 1);
   INPACK (2, c, 2);
   INPACK (3, d, 3);
 
   /* Encryption Feistel cycles. */
   ENCCYCLE (0);
   ENCCYCLE (1);
   ENCCYCLE (2);
   ENCCYCLE (3);
   ENCCYCLE (4);
   ENCCYCLE (5);
   ENCCYCLE (6);
   ENCCYCLE (7);
 
   /* Output whitening and unpacking. */
   OUTUNPACK (0, c, 4);
   OUTUNPACK (1, d, 5);
   OUTUNPACK (2, a, 6);
   OUTUNPACK (3, b, 7);
 }
 
 static unsigned int
 twofish_encrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
   do_twofish_encrypt (ctx, out, in);
   return /*burn_stack*/ (24+3*sizeof (void*));
 }
 
 #endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 
 /* Decrypt one block.  in and out may be the same. */
 
 #ifdef USE_AMD64_ASM
 
 static unsigned int
 twofish_decrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
   twofish_amd64_decrypt_block(ctx, out, in);
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
 #elif defined(USE_ARM_ASM)
 
 static unsigned int
 twofish_decrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
   _gcry_twofish_arm_decrypt_block(ctx, out, in);
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
 #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 static void
 do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
 {
   /* The four 32-bit chunks of the text. */
   u32 a, b, c, d;
 
   /* Temporaries used by the round function. */
   u32 x, y;
 
   /* Input whitening and packing. */
   INPACK (0, c, 4);
   INPACK (1, d, 5);
   INPACK (2, a, 6);
   INPACK (3, b, 7);
 
   /* Encryption Feistel cycles. */
   DECCYCLE (7);
   DECCYCLE (6);
   DECCYCLE (5);
   DECCYCLE (4);
   DECCYCLE (3);
   DECCYCLE (2);
   DECCYCLE (1);
   DECCYCLE (0);
 
   /* Output whitening and unpacking. */
   OUTUNPACK (0, a, 0);
   OUTUNPACK (1, b, 1);
   OUTUNPACK (2, c, 2);
   OUTUNPACK (3, d, 3);
 }
 
 static unsigned int
 twofish_decrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
 
   do_twofish_decrypt (ctx, out, in);
   return /*burn_stack*/ (24+3*sizeof (void*));
 }
 
 #endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 
 
 /* Bulk encryption of complete blocks in CTR mode.  This function is only
    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
    of size TWOFISH_BLOCKSIZE. */
 static void
 _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
 		      const void *inbuf_arg, size_t nblocks)
 {
   TWOFISH_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char tmpbuf[TWOFISH_BLOCKSIZE];
   unsigned int burn, burn_stack_depth = 0;
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
 
       /* Process data in 16 block chunks. */
       while (nblocks >= 16)
         {
           _gcry_twofish_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
 
           nblocks -= 16;
           outbuf += 16 * TWOFISH_BLOCKSIZE;
           inbuf  += 16 * TWOFISH_BLOCKSIZE;
           did_use_avx2 = 1;
         }
 
       if (did_use_avx2)
         {
           /* twofish-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
     }
 #endif
 
 #ifdef USE_AMD64_ASM
   {
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
         twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 3;
         outbuf += 3 * TWOFISH_BLOCKSIZE;
         inbuf += 3 * TWOFISH_BLOCKSIZE;
 
         burn = 8 * sizeof(void*);
         if (burn > burn_stack_depth)
           burn_stack_depth = burn;
       }
 
     /* Use generic code to handle smaller chunks... */
     /* TODO: use caching instead? */
   }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* Encrypt the counter. */
       burn = twofish_encrypt(ctx, tmpbuf, ctr);
       if (burn > burn_stack_depth)
         burn_stack_depth = burn;
 
       /* XOR the input with the encrypted counter and store in output.  */
       cipher_block_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
       outbuf += TWOFISH_BLOCKSIZE;
       inbuf  += TWOFISH_BLOCKSIZE;
       /* Increment the counter.  */
       cipher_block_add(ctr, 1, TWOFISH_BLOCKSIZE);
     }
 
   wipememory(tmpbuf, sizeof(tmpbuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk decryption of complete blocks in CBC mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
 		      const void *inbuf_arg, size_t nblocks)
 {
   TWOFISH_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned char savebuf[TWOFISH_BLOCKSIZE];
   unsigned int burn, burn_stack_depth = 0;
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
 
       /* Process data in 16 block chunks. */
       while (nblocks >= 16)
         {
           _gcry_twofish_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
 
           nblocks -= 16;
           outbuf += 16 * TWOFISH_BLOCKSIZE;
           inbuf  += 16 * TWOFISH_BLOCKSIZE;
           did_use_avx2 = 1;
         }
 
       if (did_use_avx2)
         {
           /* twofish-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
     }
 #endif
 
 #ifdef USE_AMD64_ASM
   {
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
         twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 3;
         outbuf += 3 * TWOFISH_BLOCKSIZE;
         inbuf += 3 * TWOFISH_BLOCKSIZE;
 
         burn = 9 * sizeof(void*);
         if (burn > burn_stack_depth)
           burn_stack_depth = burn;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       /* INBUF is needed later and it may be identical to OUTBUF, so store
          the intermediate result to SAVEBUF.  */
       burn = twofish_decrypt (ctx, savebuf, inbuf);
       if (burn > burn_stack_depth)
         burn_stack_depth = burn;
 
       cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
       inbuf += TWOFISH_BLOCKSIZE;
       outbuf += TWOFISH_BLOCKSIZE;
     }
 
   wipememory(savebuf, sizeof(savebuf));
   _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk decryption of complete blocks in CFB mode.  This function is only
    intended for the bulk encryption feature of cipher.c. */
 static void
 _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
 		    const void *inbuf_arg, size_t nblocks)
 {
   TWOFISH_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn, burn_stack_depth = 0;
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
 
       /* Process data in 16 block chunks. */
       while (nblocks >= 16)
         {
           _gcry_twofish_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
 
           nblocks -= 16;
           outbuf += 16 * TWOFISH_BLOCKSIZE;
           inbuf  += 16 * TWOFISH_BLOCKSIZE;
           did_use_avx2 = 1;
         }
 
       if (did_use_avx2)
         {
           /* twofish-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
         }
     }
 #endif
 
 #ifdef USE_AMD64_ASM
   {
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
         twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 3;
         outbuf += 3 * TWOFISH_BLOCKSIZE;
         inbuf += 3 * TWOFISH_BLOCKSIZE;
 
         burn = 8 * sizeof(void*);
         if (burn > burn_stack_depth)
           burn_stack_depth = burn;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 #endif
 
   for ( ;nblocks; nblocks-- )
     {
       burn = twofish_encrypt(ctx, iv, iv);
       if (burn > burn_stack_depth)
         burn_stack_depth = burn;
 
       cipher_block_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
       outbuf += TWOFISH_BLOCKSIZE;
       inbuf += TWOFISH_BLOCKSIZE;
     }
 
   _gcry_burn_stack(burn_stack_depth);
 }
 
 /* Bulk encryption/decryption of complete blocks in OCB mode. */
 static size_t
 _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 			const void *inbuf_arg, size_t nblocks, int encrypt)
 {
 #ifdef USE_AMD64_ASM
   TWOFISH_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn, burn_stack_depth = 0;
   u64 blkn = c->u_mode.ocb.data_nblocks;
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
       u64 Ls[16];
       u64 *l;
 
       if (nblocks >= 16)
 	{
           l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
 	      blkn += 16;
 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      if (encrypt)
 		_gcry_twofish_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
 					  c->u_ctr.ctr, Ls);
 	      else
 		_gcry_twofish_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
 					  c->u_ctr.ctr, Ls);
 
 	      nblocks -= 16;
 	      outbuf += 16 * TWOFISH_BLOCKSIZE;
 	      inbuf  += 16 * TWOFISH_BLOCKSIZE;
 	      did_use_avx2 = 1;
 	    }
 	}
 
       if (did_use_avx2)
 	{
 	  /* twofish-avx2 assembly code does not use stack */
 	  if (nblocks == 0)
 	    burn_stack_depth = 0;
 	}
     }
 #endif
 
   {
     /* Use u64 to store pointers for x32 support (assembly function
       * assumes 64-bit pointers). */
     u64 Ls[3];
 
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
 	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
 	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
 	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
 	blkn += 3;
 
 	if (encrypt)
 	  twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
 				Ls);
 	else
 	  twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
 				Ls);
 
 	nblocks -= 3;
 	outbuf += 3 * TWOFISH_BLOCKSIZE;
 	inbuf  += 3 * TWOFISH_BLOCKSIZE;
 
 	burn = 8 * sizeof(void*);
 	if (burn > burn_stack_depth)
 	  burn_stack_depth = burn;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 
   c->u_mode.ocb.data_nblocks = blkn;
 
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #else
   (void)c;
   (void)outbuf_arg;
   (void)inbuf_arg;
   (void)encrypt;
 #endif
 
   return nblocks;
 }
 
 /* Bulk authentication of complete blocks in OCB mode. */
 static size_t
 _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 			size_t nblocks)
 {
 #ifdef USE_AMD64_ASM
   TWOFISH_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   unsigned int burn, burn_stack_depth = 0;
   u64 blkn = c->u_mode.ocb.aad_nblocks;
 
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     {
       int did_use_avx2 = 0;
       u64 Ls[16];
       u64 *l;
 
       if (nblocks >= 16)
 	{
           l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
 	      blkn += 16;
 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      _gcry_twofish_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					  c->u_mode.ocb.aad_sum, Ls);
 
 	      nblocks -= 16;
 	      abuf += 16 * TWOFISH_BLOCKSIZE;
 	      did_use_avx2 = 1;
 	    }
 	}
 
       if (did_use_avx2)
 	{
 	  /* twofish-avx2 assembly code does not use stack */
 	  if (nblocks == 0)
 	    burn_stack_depth = 0;
 	}
 
       /* Use generic code to handle smaller chunks... */
     }
 #endif
 
   {
     /* Use u64 to store pointers for x32 support (assembly function
       * assumes 64-bit pointers). */
     u64 Ls[3];
 
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
 	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
 	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
 	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
 	blkn += 3;
 
 	twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 			       c->u_mode.ocb.aad_sum, Ls);
 
 	nblocks -= 3;
 	abuf += 3 * TWOFISH_BLOCKSIZE;
 
 	burn = 8 * sizeof(void*);
 	if (burn > burn_stack_depth)
 	  burn_stack_depth = burn;
       }
 
     /* Use generic code to handle smaller chunks... */
   }
 
   c->u_mode.ocb.aad_nblocks = blkn;
 
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #else
   (void)c;
   (void)abuf_arg;
 #endif
 
   return nblocks;
 }
 
 
 static unsigned int
 twofish_crypt_blk1_16(void *context, byte *out, const byte *in,
 		      size_t num_blks, int encrypt)
 {
   TWOFISH_context *ctx = context;
   unsigned int burn, burn_stack_depth = 0;
 
 #ifdef USE_AVX2
   if (num_blks == 16 && ctx->use_avx2)
     {
       _gcry_twofish_avx2_blk16 (ctx, out, in, encrypt);
       return 0;
     }
 #endif
 
 #ifdef USE_AMD64_ASM
   while (num_blks >= 3)
     {
       _gcry_twofish_amd64_blk3 (ctx, out, in, encrypt);
       burn = 8 * sizeof(void *);
       burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth;
       out += 3 * TWOFISH_BLOCKSIZE;
       in += 3 * TWOFISH_BLOCKSIZE;
       num_blks -= 3;
     }
 #endif
 
   while (num_blks >= 1)
     {
       if (encrypt)
 	burn = twofish_encrypt((void *)ctx, out, in);
       else
 	burn = twofish_decrypt((void *)ctx, out, in);
 
       burn_stack_depth = (burn > burn_stack_depth) ? burn : burn_stack_depth;
       out += TWOFISH_BLOCKSIZE;
       in += TWOFISH_BLOCKSIZE;
       num_blks--;
     }
 
   return burn_stack_depth;
 }
 
 static unsigned int
 twofish_encrypt_blk1_16(void *ctx, byte *out, const byte *in,
 			size_t num_blks)
 {
   return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 1);
 }
 
 static unsigned int
 twofish_decrypt_blk1_16(void *ctx, byte *out, const byte *in,
 			size_t num_blks)
 {
   return twofish_crypt_blk1_16 (ctx, out, in, num_blks, 0);
 }
 
 
 /* Bulk encryption/decryption of complete blocks in XTS mode. */
 static void
 _gcry_twofish_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg,
 			 const void *inbuf_arg, size_t nblocks, int encrypt)
 {
   TWOFISH_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   int burn_stack_depth = 0;
 
   /* Process remaining blocks. */
   if (nblocks)
     {
       unsigned char tmpbuf[16 * 16];
       unsigned int tmp_used = 16;
       size_t tmpbufsize = 15 * 16;
       size_t nburn;
 
 #ifdef USE_AVX2
       if (ctx->use_avx2)
 	tmpbufsize = 16 * 16;
 #endif
 
       nburn = bulk_xts_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16
                                               : twofish_decrypt_blk1_16,
                                  outbuf, inbuf, nblocks,
                                  tweak, tmpbuf, tmpbufsize / 16,
                                  &tmp_used);
       burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
 
       wipememory(tmpbuf, tmp_used);
     }
 
   if (burn_stack_depth)
     _gcry_burn_stack(burn_stack_depth);
 }
 
 
 /* Bulk encryption/decryption in ECB mode. */
 static void
 _gcry_twofish_ecb_crypt (void *context, void *outbuf_arg, const void *inbuf_arg,
 			 size_t nblocks, int encrypt)
 {
   TWOFISH_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   int burn_stack_depth = 0;
 
   /* Process remaining blocks. */
   if (nblocks)
     {
       size_t fn_maxblocks = 15;
       size_t nburn;
 
 #ifdef USE_AVX2
       if (ctx->use_avx2)
 	fn_maxblocks = 16;
 #endif
 
       nburn = bulk_ecb_crypt_128(ctx, encrypt ? twofish_encrypt_blk1_16
                                               : twofish_decrypt_blk1_16,
                                  outbuf, inbuf, nblocks, fn_maxblocks);
       burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
     }
 
   if (burn_stack_depth)
     _gcry_burn_stack(burn_stack_depth);
 }
 
 
 
 /* Test a single encryption and decryption with each key size. */
 
 static const char*
 selftest (void)
 {
   TWOFISH_context ctx; /* Expanded key. */
   byte scratch[16];    /* Encryption/decryption result buffer. */
   cipher_bulk_ops_t bulk_ops;
 
   /* Test vectors for single encryption/decryption.  Note that I am using
    * the vectors from the Twofish paper's "known answer test", I=3 for
    * 128-bit and I=4 for 256-bit, instead of the all-0 vectors from the
    * "intermediate value test", because an all-0 key would trigger all the
    * special cases in the RS matrix multiply, leaving the math untested. */
   static  byte plaintext[16] = {
     0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E,
     0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19
   };
   static byte key[16] = {
     0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32,
     0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A
   };
   static const byte ciphertext[16] = {
     0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85,
     0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3
   };
   static byte plaintext_256[16] = {
     0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F,
     0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6
   };
   static byte key_256[32] = {
     0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46,
     0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D,
     0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B,
     0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F
   };
   static const byte ciphertext_256[16] = {
     0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97,
     0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA
   };
 
   twofish_setkey (&ctx, key, sizeof(key), &bulk_ops);
   twofish_encrypt (&ctx, scratch, plaintext);
   if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
     return "Twofish-128 test encryption failed.";
   twofish_decrypt (&ctx, scratch, scratch);
   if (memcmp (scratch, plaintext, sizeof (plaintext)))
     return "Twofish-128 test decryption failed.";
 
   twofish_setkey (&ctx, key_256, sizeof(key_256), &bulk_ops);
   twofish_encrypt (&ctx, scratch, plaintext_256);
   if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
     return "Twofish-256 test encryption failed.";
   twofish_decrypt (&ctx, scratch, scratch);
   if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
     return "Twofish-256 test decryption failed.";
 
   return NULL;
 }
 
 /* More complete test program.	This does 1000 encryptions and decryptions
  * with each of 250 128-bit keys and 2000 encryptions and decryptions with
  * each of 125 256-bit keys, using a feedback scheme similar to a Feistel
  * cipher, so as to be sure of testing all the table entries pretty
  * thoroughly.	We keep changing the keys so as to get a more meaningful
  * performance number, since the key setup is non-trivial for Twofish. */
 
 #ifdef TEST
 
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
 
 int
 main()
 {
   TWOFISH_context ctx;     /* Expanded key. */
   int i, j;                /* Loop counters. */
   cipher_bulk_ops_t bulk_ops;
 
   const char *encrypt_msg; /* Message to print regarding encryption test;
                             * the printf is done outside the loop to avoid
                             * stuffing up the timing. */
   clock_t timer; /* For computing elapsed time. */
 
   /* Test buffer. */
   byte buffer[4][16] = {
     {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
      0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF},
     {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78,
      0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0},
     {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
      0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10},
     {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10,
      0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98}
   };
 
   /* Expected outputs for the million-operation test */
   static const byte test_encrypt[4][16] = {
     {0xC8, 0x23, 0xB8, 0xB7, 0x6B, 0xFE, 0x91, 0x13,
      0x2F, 0xA7, 0x5E, 0xE6, 0x94, 0x77, 0x6F, 0x6B},
     {0x90, 0x36, 0xD8, 0x29, 0xD5, 0x96, 0xC2, 0x8E,
      0xE4, 0xFF, 0x76, 0xBC, 0xE5, 0x77, 0x88, 0x27},
     {0xB8, 0x78, 0x69, 0xAF, 0x42, 0x8B, 0x48, 0x64,
      0xF7, 0xE9, 0xF3, 0x9C, 0x42, 0x18, 0x7B, 0x73},
     {0x7A, 0x88, 0xFB, 0xEB, 0x90, 0xA4, 0xB4, 0xA8,
      0x43, 0xA3, 0x1D, 0xF1, 0x26, 0xC4, 0x53, 0x57}
   };
   static const byte test_decrypt[4][16] = {
     {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
      0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF},
     {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78,
      0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0},
     {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
      0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10},
     {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10,
      0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98}
   };
 
   /* Start the timer ticking. */
   timer = clock ();
 
   /* Encryption test. */
   for (i = 0; i < 125; i++)
     {
       twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops);
       for (j = 0; j < 1000; j++)
         twofish_encrypt (&ctx, buffer[2], buffer[2]);
       twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops);
       for (j = 0; j < 1000; j++)
         twofish_encrypt (&ctx, buffer[3], buffer[3]);
       twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops);
       for (j = 0; j < 1000; j++) {
         twofish_encrypt (&ctx, buffer[0], buffer[0]);
         twofish_encrypt (&ctx, buffer[1], buffer[1]);
       }
     }
   encrypt_msg = memcmp (buffer, test_encrypt, sizeof (test_encrypt)) ?
     "encryption failure!\n" : "encryption OK!\n";
 
   /* Decryption test. */
   for (i = 0; i < 125; i++)
     {
       twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops);
       for (j = 0; j < 1000; j++) {
         twofish_decrypt (&ctx, buffer[0], buffer[0]);
         twofish_decrypt (&ctx, buffer[1], buffer[1]);
       }
       twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops);
       for (j = 0; j < 1000; j++)
         twofish_decrypt (&ctx, buffer[3], buffer[3]);
       twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops);
       for (j = 0; j < 1000; j++)
         twofish_decrypt (&ctx, buffer[2], buffer[2]);
     }
 
   /* Stop the timer, and print results. */
   timer = clock () - timer;
   printf (encrypt_msg);
   printf (memcmp (buffer, test_decrypt, sizeof (test_decrypt)) ?
           "decryption failure!\n" : "decryption OK!\n");
   printf ("elapsed time: %.1f s.\n", (float) timer / CLOCKS_PER_SEC);
 
   return 0;
 }
 
 #endif /* TEST */
 
 
 
 gcry_cipher_spec_t _gcry_cipher_spec_twofish =
   {
     GCRY_CIPHER_TWOFISH, {0, 0},
     "TWOFISH", NULL, NULL, 16, 256, sizeof (TWOFISH_context),
     twofish_setkey, twofish_encrypt, twofish_decrypt
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_twofish128 =
   {
     GCRY_CIPHER_TWOFISH128, {0, 0},
     "TWOFISH128", NULL, NULL, 16, 128, sizeof (TWOFISH_context),
     twofish_setkey, twofish_encrypt, twofish_decrypt
   };
diff --git a/mpi/alpha/mpih-add1.S b/mpi/alpha/mpih-add1.S
index 50dbb2b9..463c5c07 100644
--- a/mpi/alpha/mpih-add1.S
+++ b/mpi/alpha/mpih-add1.S
@@ -1,124 +1,124 @@
 /* alpha  add_n -- Add two limb vectors of the same length > 0 and store
  *		   sum in a third limb vector.
  * Copyright (C) 1995, 1998, 2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	($16)
  *		   mpi_ptr_t s1_ptr,	($17)
  *		   mpi_ptr_t s2_ptr,	($18)
  *		   mpi_size_t size)	($19)
  */
 
 
 	.set	noreorder
 	.set	noat
 .text
 	.align	3
 	.globl	_gcry_mpih_add_n
 	.ent	_gcry_mpih_add_n
 _gcry_mpih_add_n:
 	.frame	$30,0,$26,0
 
 	ldq	$3,0($17)
 	ldq	$4,0($18)
 
 	subq	$19,1,$19
 	and	$19,4-1,$2	# number of limbs in first loop
 	bis	$31,$31,$0
 	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
 
 	subq	$19,$2,$19
 
 .Loop0: subq	$2,1,$2
 	ldq	$5,8($17)
 	addq	$4,$0,$4
 	ldq	$6,8($18)
 	cmpult	$4,$0,$1
 	addq	$3,$4,$4
 	cmpult	$4,$3,$0
 	stq	$4,0($16)
 	or	$0,$1,$0
 
 	addq	$17,8,$17
 	addq	$18,8,$18
 	bis	$5,$5,$3
 	bis	$6,$6,$4
 	addq	$16,8,$16
 	bne	$2,.Loop0
 
 .L0:	beq	$19,.Lend
 
 	.align	3
 .Loop:	subq	$19,4,$19
 
 	ldq	$5,8($17)
 	addq	$4,$0,$4
 	ldq	$6,8($18)
 	cmpult	$4,$0,$1
 	addq	$3,$4,$4
 	cmpult	$4,$3,$0
 	stq	$4,0($16)
 	or	$0,$1,$0
 
 	ldq	$3,16($17)
 	addq	$6,$0,$6
 	ldq	$4,16($18)
 	cmpult	$6,$0,$1
 	addq	$5,$6,$6
 	cmpult	$6,$5,$0
 	stq	$6,8($16)
 	or	$0,$1,$0
 
 	ldq	$5,24($17)
 	addq	$4,$0,$4
 	ldq	$6,24($18)
 	cmpult	$4,$0,$1
 	addq	$3,$4,$4
 	cmpult	$4,$3,$0
 	stq	$4,16($16)
 	or	$0,$1,$0
 
 	ldq	$3,32($17)
 	addq	$6,$0,$6
 	ldq	$4,32($18)
 	cmpult	$6,$0,$1
 	addq	$5,$6,$6
 	cmpult	$6,$5,$0
 	stq	$6,24($16)
 	or	$0,$1,$0
 
 	addq	$17,32,$17
 	addq	$18,32,$18
 	addq	$16,32,$16
 	bne	$19,.Loop
 
 .Lend:	addq	$4,$0,$4
 	cmpult	$4,$0,$1
 	addq	$3,$4,$4
 	cmpult	$4,$3,$0
 	stq	$4,0($16)
 	or	$0,$1,$0
 	ret	$31,($26),1
 
 	.end	_gcry_mpih_add_n
 
diff --git a/mpi/alpha/mpih-lshift.S b/mpi/alpha/mpih-lshift.S
index ded4b15c..75ae698c 100644
--- a/mpi/alpha/mpih-lshift.S
+++ b/mpi/alpha/mpih-lshift.S
@@ -1,122 +1,122 @@
 /* alpha - left shift
  *
  *      Copyright (C) 1994, 1995, 1998, 2001,
  *                    2002  Free Software Foundation, Inc.
  *
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_lshift( mpi_ptr_t wp,	(r16)
  *		   mpi_ptr_t up,	(r17)
  *		   mpi_size_t usize,	(r18)
  *		   unsigned cnt)	(r19)
  *
  * This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
  * it would take 4 cycles/limb.  It should be possible to get down to 3
  * cycles/limb since both ldq and stq can be paired with the other used
  * instructions.  But there are many restrictions in the 21064 pipeline that
  * makes it hard, if not impossible, to get down to 3 cycles/limb:
  *
  * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
  * 2. Only aligned instruction pairs can be paired.
  * 3. The store buffer or silo might not be able to deal with the bandwidth.
  */
 
 	.set	noreorder
 	.set	noat
 .text
 	.align	3
 	.globl	_gcry_mpih_lshift
 	.ent	_gcry_mpih_lshift
 _gcry_mpih_lshift:
 	.frame	$30,0,$26,0
 
 	s8addq	$18,$17,$17	# make r17 point at end of s1
 	ldq	$4,-8($17)	# load first limb
 	subq	$17,8,$17
 	subq	$31,$19,$7
 	s8addq	$18,$16,$16	# make r16 point at end of RES
 	subq	$18,1,$18
 	and	$18,4-1,$20	# number of limbs in first loop
 	srl	$4,$7,$0	# compute function result
 
 	beq	$20,.L0
 	subq	$18,$20,$18
 
 	.align	3
 .Loop0:
 	ldq	$3,-8($17)
 	subq	$16,8,$16
 	subq	$17,8,$17
 	subq	$20,1,$20
 	sll	$4,$19,$5
 	srl	$3,$7,$6
 	bis	$3,$3,$4
 	bis	$5,$6,$8
 	stq	$8,0($16)
 	bne	$20,.Loop0
 
 .L0:	beq	$18,.Lend
 
 	.align	3
 .Loop:	ldq	$3,-8($17)
 	subq	$16,32,$16
 	subq	$18,4,$18
 	sll	$4,$19,$5
 	srl	$3,$7,$6
 
 	ldq	$4,-16($17)
 	sll	$3,$19,$1
 	bis	$5,$6,$8
 	stq	$8,24($16)
 	srl	$4,$7,$2
 
 	ldq	$3,-24($17)
 	sll	$4,$19,$5
 	bis	$1,$2,$8
 	stq	$8,16($16)
 	srl	$3,$7,$6
 
 	ldq	$4,-32($17)
 	sll	$3,$19,$1
 	bis	$5,$6,$8
 	stq	$8,8($16)
 	srl	$4,$7,$2
 
 	subq	$17,32,$17
 	bis	$1,$2,$8
 	stq	$8,0($16)
 
 	bgt	$18,.Loop
 
 .Lend:	sll	$4,$19,$8
 	stq	$8,-8($16)
 	ret	$31,($26),1
 	.end	_gcry_mpih_lshift
 
 
diff --git a/mpi/alpha/mpih-mul1.S b/mpi/alpha/mpih-mul1.S
index cd91b104..030a288d 100644
--- a/mpi/alpha/mpih-mul1.S
+++ b/mpi/alpha/mpih-mul1.S
@@ -1,90 +1,90 @@
 /* Alpha 21064 mpih-mul1.S -- Multiply a limb vector with a limb and store
  *			      the result in a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,	(r16)
  *		  mpi_ptr_t s1_ptr,	(r17)
  *		  mpi_size_t s1_size,	(r18)
  *		  mpi_limb_t s2_limb)	(r19)
  *
  * This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
  *
  * To improve performance for long multiplications, we would use
  * 'fetch' for S1 and 'fetch_m' for RES.  It's not obvious how to use
  * these instructions without slowing down the general code: 1. We can
  * only have two prefetches in operation at any time in the Alpha
  * architecture.  2. There will seldom be any special alignment
  * between RES_PTR and S1_PTR.	Maybe we can simply divide the current
  * loop into an inner and outer loop, having the inner loop handle
  * exactly one prefetch block?
  */
 
 	.set	noreorder
 	.set	noat
 .text
 	.align	3
 	.globl	_gcry_mpih_mul_1
 	.ent	_gcry_mpih_mul_1 2
 _gcry_mpih_mul_1:
 	.frame	$30,0,$26
 
 	ldq	$2,0($17)	# $2 = s1_limb
 	subq	$18,1,$18	# size--
 	mulq	$2,$19,$3	# $3 = prod_low
 	bic	$31,$31,$4	# clear cy_limb
 	umulh	$2,$19,$0	# $0 = prod_high
 	beq	$18,Lend1	# jump if size was == 1
 	ldq	$2,8($17)	# $2 = s1_limb
 	subq	$18,1,$18	# size--
 	stq	$3,0($16)
 	beq	$18,Lend2	# jump if size was == 2
 
 	.align	3
 Loop:	mulq	$2,$19,$3	# $3 = prod_low
 	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
 	subq	$18,1,$18	# size--
 	umulh	$2,$19,$4	# $4 = cy_limb
 	ldq	$2,16($17)	# $2 = s1_limb
 	addq	$17,8,$17	# s1_ptr++
 	addq	$3,$0,$3	# $3 = cy_limb + prod_low
 	stq	$3,8($16)
 	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
 	addq	$16,8,$16	# res_ptr++
 	bne	$18,Loop
 
 Lend2:	mulq	$2,$19,$3	# $3 = prod_low
 	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
 	umulh	$2,$19,$4	# $4 = cy_limb
 	addq	$3,$0,$3	# $3 = cy_limb + prod_low
 	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
 	stq	$3,8($16)
 	addq	$4,$0,$0	# cy_limb = prod_high + cy
 	ret	$31,($26),1
 Lend1:	stq	$3,0($16)
 	ret	$31,($26),1
 
 	.end	_gcry_mpih_mul_1
 
 
diff --git a/mpi/alpha/mpih-mul2.S b/mpi/alpha/mpih-mul2.S
index 5eb6b98b..566642ee 100644
--- a/mpi/alpha/mpih-mul2.S
+++ b/mpi/alpha/mpih-mul2.S
@@ -1,97 +1,97 @@
 /* Alpha 21064 addmul_1 -- Multiply a limb vector with a limb and add
  *			   the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,      (r16)
  *		     mpi_ptr_t s1_ptr,	     (r17)
  *		     mpi_size_t s1_size,     (r18)
  *		     mpi_limb_t s2_limb)     (r19)
  *
  * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
  */
 
 
 	.set	noreorder
 	.set	noat
 .text
 	.align	3
 	.globl	_gcry_mpih_addmul_1
 	.ent	_gcry_mpih_addmul_1 2
 _gcry_mpih_addmul_1:
 	.frame	$30,0,$26
 
 	ldq	$2,0($17)	# $2 = s1_limb
 	addq	$17,8,$17	# s1_ptr++
 	subq	$18,1,$18	# size--
 	mulq	$2,$19,$3	# $3 = prod_low
 	ldq	$5,0($16)	# $5 = *res_ptr
 	umulh	$2,$19,$0	# $0 = prod_high
 	beq	$18,.Lend1	# jump if size was == 1
 	ldq	$2,0($17)	# $2 = s1_limb
 	addq	$17,8,$17	# s1_ptr++
 	subq	$18,1,$18	# size--
 	addq	$5,$3,$3
 	cmpult	$3,$5,$4
 	stq	$3,0($16)
 	addq	$16,8,$16	# res_ptr++
 	beq	$18,.Lend2	# jump if size was == 2
 
 	.align	3
 .Loop:	mulq	$2,$19,$3	# $3 = prod_low
 	ldq	$5,0($16)	# $5 = *res_ptr
 	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
 	subq	$18,1,$18	# size--
 	umulh	$2,$19,$4	# $4 = cy_limb
 	ldq	$2,0($17)	# $2 = s1_limb
 	addq	$17,8,$17	# s1_ptr++
 	addq	$3,$0,$3	# $3 = cy_limb + prod_low
 	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
 	addq	$5,$3,$3
 	cmpult	$3,$5,$5
 	stq	$3,0($16)
 	addq	$16,8,$16	# res_ptr++
 	addq	$5,$0,$0	# combine carries
 	bne	$18,.Loop
 
 .Lend2: mulq	$2,$19,$3	# $3 = prod_low
 	ldq	$5,0($16)	# $5 = *res_ptr
 	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
 	umulh	$2,$19,$4	# $4 = cy_limb
 	addq	$3,$0,$3	# $3 = cy_limb + prod_low
 	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
 	addq	$5,$3,$3
 	cmpult	$3,$5,$5
 	stq	$3,0($16)
 	addq	$5,$0,$0	# combine carries
 	addq	$4,$0,$0	# cy_limb = prod_high + cy
 	ret	$31,($26),1
 .Lend1: addq	$5,$3,$3
 	cmpult	$3,$5,$5
 	stq	$3,0($16)
 	addq	$0,$5,$0
 	ret	$31,($26),1
 
 	.end	_gcry_mpih_addmul_1
 
diff --git a/mpi/alpha/mpih-mul3.S b/mpi/alpha/mpih-mul3.S
index 7d5d2afe..520d1c62 100644
--- a/mpi/alpha/mpih-mul3.S
+++ b/mpi/alpha/mpih-mul3.S
@@ -1,95 +1,95 @@
 /* Alpha 21064	submul_1 -- Multiply a limb vector with a limb and
  *			    subtract the result from a second limb vector.
  *      Copyright (C) 1992, 1994, 1995, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,      (r16   )
  *		     mpi_ptr_t s1_ptr,	     (r17   )
  *		     mpi_size_t s1_size,     (r18   )
  *		     mpi_limb_t s2_limb)     (r19   )
  *
  * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
  */
 
 	.set	noreorder
 	.set	noat
 .text
 	.align	3
 	.globl	_gcry_mpih_submul_1
 	.ent	_gcry_mpih_submul_1 2
 _gcry_mpih_submul_1:
 	.frame	$30,0,$26
 
 	ldq	$2,0($17)	# $2 = s1_limb
 	addq	$17,8,$17	# s1_ptr++
 	subq	$18,1,$18	# size--
 	mulq	$2,$19,$3	# $3 = prod_low
 	ldq	$5,0($16)	# $5 = *res_ptr
 	umulh	$2,$19,$0	# $0 = prod_high
 	beq	$18,.Lend1	# jump if size was == 1
 	ldq	$2,0($17)	# $2 = s1_limb
 	addq	$17,8,$17	# s1_ptr++
 	subq	$18,1,$18	# size--
 	subq	$5,$3,$3
 	cmpult	$5,$3,$4
 	stq	$3,0($16)
 	addq	$16,8,$16	# res_ptr++
 	beq	$18,.Lend2	# jump if size was == 2
 
 	.align	3
 .Loop:	mulq	$2,$19,$3	# $3 = prod_low
 	ldq	$5,0($16)	# $5 = *res_ptr
 	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
 	subq	$18,1,$18	# size--
 	umulh	$2,$19,$4	# $4 = cy_limb
 	ldq	$2,0($17)	# $2 = s1_limb
 	addq	$17,8,$17	# s1_ptr++
 	addq	$3,$0,$3	# $3 = cy_limb + prod_low
 	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
 	subq	$5,$3,$3
 	cmpult	$5,$3,$5
 	stq	$3,0($16)
 	addq	$16,8,$16	# res_ptr++
 	addq	$5,$0,$0	# combine carries
 	bne	$18,.Loop
 
 .Lend2: mulq	$2,$19,$3	# $3 = prod_low
 	ldq	$5,0($16)	# $5 = *res_ptr
 	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
 	umulh	$2,$19,$4	# $4 = cy_limb
 	addq	$3,$0,$3	# $3 = cy_limb + prod_low
 	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
 	subq	$5,$3,$3
 	cmpult	$5,$3,$5
 	stq	$3,0($16)
 	addq	$5,$0,$0	# combine carries
 	addq	$4,$0,$0	# cy_limb = prod_high + cy
 	ret	$31,($26),1
 .Lend1: subq	$5,$3,$3
 	cmpult	$5,$3,$5
 	stq	$3,0($16)
 	addq	$0,$5,$0
 	ret	$31,($26),1
 
 	.end	_gcry_mpih_submul_1
 
diff --git a/mpi/alpha/mpih-rshift.S b/mpi/alpha/mpih-rshift.S
index f0c98143..85e0af53 100644
--- a/mpi/alpha/mpih-rshift.S
+++ b/mpi/alpha/mpih-rshift.S
@@ -1,118 +1,118 @@
 /* alpha    rshift
  *      Copyright (C) 1994, 1995, 1998, 1999,
  *                    2000, 2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_rshift( mpi_ptr_t wp,	(r16)
  *		   mpi_ptr_t up,	(r17)
  *		   mpi_size_t usize,	(r18)
  *		   unsigned cnt)	(r19)
  *
  * This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
  * it would take 4 cycles/limb.  It should be possible to get down to 3
  * cycles/limb since both ldq and stq can be paired with the other used
  * instructions.  But there are many restrictions in the 21064 pipeline that
  * makes it hard, if not impossible, to get down to 3 cycles/limb:
  *
  * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
  * 2. Only aligned instruction pairs can be paired.
  * 3. The store buffer or silo might not be able to deal with the bandwidth.
  */
 
 	.set	noreorder
 	.set	noat
 .text
 	.align	3
 	.globl	_gcry_mpih_rshift
 	.ent	_gcry_mpih_rshift
 _gcry_mpih_rshift:
 	.frame	$30,0,$26,0
 
 	ldq	$4,0($17)	# load first limb
 	addq	$17,8,$17
 	subq	$31,$19,$7
 	subq	$18,1,$18
 	and	$18,4-1,$20	# number of limbs in first loop
 	sll	$4,$7,$0	# compute function result
 
 	beq	$20,.R0
 	subq	$18,$20,$18
 
 	.align	3
 .Roop0:
 	ldq	$3,0($17)
 	addq	$16,8,$16
 	addq	$17,8,$17
 	subq	$20,1,$20
 	srl	$4,$19,$5
 	sll	$3,$7,$6
 	bis	$3,$3,$4
 	bis	$5,$6,$8
 	stq	$8,-8($16)
 	bne	$20,.Roop0
 
 .R0:	beq	$18,.Rend
 
 	.align	3
 .Roop:	ldq	$3,0($17)
 	addq	$16,32,$16
 	subq	$18,4,$18
 	srl	$4,$19,$5
 	sll	$3,$7,$6
 
 	ldq	$4,8($17)
 	srl	$3,$19,$1
 	bis	$5,$6,$8
 	stq	$8,-32($16)
 	sll	$4,$7,$2
 
 	ldq	$3,16($17)
 	srl	$4,$19,$5
 	bis	$1,$2,$8
 	stq	$8,-24($16)
 	sll	$3,$7,$6
 
 	ldq	$4,24($17)
 	srl	$3,$19,$1
 	bis	$5,$6,$8
 	stq	$8,-16($16)
 	sll	$4,$7,$2
 
 	addq	$17,32,$17
 	bis	$1,$2,$8
 	stq	$8,-8($16)
 
 	bgt	$18,.Roop
 
 .Rend:	srl	$4,$19,$8
 	stq	$8,0($16)
 	ret	$31,($26),1
 	.end	_gcry_mpih_rshift
 
diff --git a/mpi/alpha/mpih-sub1.S b/mpi/alpha/mpih-sub1.S
index 9a644468..6896b574 100644
--- a/mpi/alpha/mpih-sub1.S
+++ b/mpi/alpha/mpih-sub1.S
@@ -1,124 +1,124 @@
 /* Alpha  sub_n -- Subtract two limb vectors of the same length > 0 and
  *		  store difference in a third limb vector.
  *      Copyright (C) 1995, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	(r16)
  *		   mpi_ptr_t s1_ptr,	(r17)
  *		   mpi_ptr_t s2_ptr,	(r18)
  *		   mpi_size_t size)	(r19)
  */
 
 	.set	noreorder
 	.set	noat
 .text
 	.align	3
 	.globl	_gcry_mpih_sub_n
 	.ent	_gcry_mpih_sub_n
 _gcry_mpih_sub_n:
 	.frame	$30,0,$26,0
 
 	ldq	$3,0($17)
 	ldq	$4,0($18)
 
 	subq	$19,1,$19
 	and	$19,4-1,$2	# number of limbs in first loop
 	bis	$31,$31,$0
 	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
 
 	subq	$19,$2,$19
 
 .Loop0: subq	$2,1,$2
 	ldq	$5,8($17)
 	addq	$4,$0,$4
 	ldq	$6,8($18)
 	cmpult	$4,$0,$1
 	subq	$3,$4,$4
 	cmpult	$3,$4,$0
 	stq	$4,0($16)
 	or	$0,$1,$0
 
 	addq	$17,8,$17
 	addq	$18,8,$18
 	bis	$5,$5,$3
 	bis	$6,$6,$4
 	addq	$16,8,$16
 	bne	$2,.Loop0
 
 .L0:	beq	$19,.Lend
 
 	.align	3
 .Loop:	subq	$19,4,$19
 
 	ldq	$5,8($17)
 	addq	$4,$0,$4
 	ldq	$6,8($18)
 	cmpult	$4,$0,$1
 	subq	$3,$4,$4
 	cmpult	$3,$4,$0
 	stq	$4,0($16)
 	or	$0,$1,$0
 
 	ldq	$3,16($17)
 	addq	$6,$0,$6
 	ldq	$4,16($18)
 	cmpult	$6,$0,$1
 	subq	$5,$6,$6
 	cmpult	$5,$6,$0
 	stq	$6,8($16)
 	or	$0,$1,$0
 
 	ldq	$5,24($17)
 	addq	$4,$0,$4
 	ldq	$6,24($18)
 	cmpult	$4,$0,$1
 	subq	$3,$4,$4
 	cmpult	$3,$4,$0
 	stq	$4,16($16)
 	or	$0,$1,$0
 
 	ldq	$3,32($17)
 	addq	$6,$0,$6
 	ldq	$4,32($18)
 	cmpult	$6,$0,$1
 	subq	$5,$6,$6
 	cmpult	$5,$6,$0
 	stq	$6,24($16)
 	or	$0,$1,$0
 
 	addq	$17,32,$17
 	addq	$18,32,$18
 	addq	$16,32,$16
 	bne	$19,.Loop
 
 .Lend:	addq	$4,$0,$4
 	cmpult	$4,$0,$1
 	subq	$3,$4,$4
 	cmpult	$3,$4,$0
 	stq	$4,0($16)
 	or	$0,$1,$0
 	ret	$31,($26),1
 
 	.end	_gcry_mpih_sub_n
 
 
diff --git a/mpi/alpha/udiv-qrnnd.S b/mpi/alpha/udiv-qrnnd.S
index dd0c52d7..eb1602a9 100644
--- a/mpi/alpha/udiv-qrnnd.S
+++ b/mpi/alpha/udiv-qrnnd.S
@@ -1,159 +1,159 @@
 /* Alpha 21064 __udiv_qrnnd
  *
  *      Copyright (C) 1992, 1994, 1995, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 	.set noreorder
 	.set noat
 .text
 	.align	3
 	.globl	__udiv_qrnnd
 	.ent	__udiv_qrnnd
 __udiv_qrnnd:
 	.frame $30,0,$26,0
 	.prologue 0
 #define cnt	$2
 #define tmp	$3
 #define rem_ptr $16
 #define n1	$17
 #define n0	$18
 #define d	$19
 #define qb	$20
 
 	ldiq	cnt,16
 	blt	d,.Largedivisor
 
 .Loop1: cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
 	cmpule	d,n1,qb
 	subq	n1,d,tmp
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
 	cmpule	d,n1,qb
 	subq	n1,d,tmp
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
 	cmpule	d,n1,qb
 	subq	n1,d,tmp
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
 	cmpule	d,n1,qb
 	subq	n1,d,tmp
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	subq	cnt,1,cnt
 	bgt	cnt,.Loop1
 	stq	n1,0(rem_ptr)
 	bis	$31,n0,$0
 	ret	$31,($26),1
 
 .Largedivisor:
 	and	n0,1,$4
 
 	srl	n0,1,n0
 	sll	n1,63,tmp
 	or	tmp,n0,n0
 	srl	n1,1,n1
 
 	and	d,1,$6
 	srl	d,1,$5
 	addq	$5,$6,$5
 
 .Loop2: cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
 	cmpule	$5,n1,qb
 	subq	n1,$5,tmp
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
 	cmpule	$5,n1,qb
 	subq	n1,$5,tmp
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
 	cmpule	$5,n1,qb
 	subq	n1,$5,tmp
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	cmplt	n0,0,tmp
 	addq	n1,n1,n1
 	bis	n1,tmp,n1
 	addq	n0,n0,n0
 	cmpule	$5,n1,qb
 	subq	n1,$5,tmp
 	cmovne	qb,tmp,n1
 	bis	n0,qb,n0
 	subq	cnt,1,cnt
 	bgt	cnt,.Loop2
 
 	addq	n1,n1,n1
 	addq	$4,n1,n1
 	bne	$6,.LOdd
 	stq	n1,0(rem_ptr)
 	bis	$31,n0,$0
 	ret	$31,($26),1
 
 .LOdd:
 	/* q' in n0. r' in n1 */
 	addq	n1,n0,n1
 	cmpult	n1,n0,tmp	# tmp := carry from addq
 	beq	tmp,.LLp6
 	addq	n0,1,n0
 	subq	n1,d,n1
 .LLp6:	cmpult	n1,d,tmp
 	bne	tmp,.LLp7
 	addq	n0,1,n0
 	subq	n1,d,n1
 .LLp7:
 	stq	n1,0(rem_ptr)
 	bis	$31,n0,$0
 	ret	$31,($26),1
 
 	.end	__udiv_qrnnd
diff --git a/mpi/amd64/mpih-add1.S b/mpi/amd64/mpih-add1.S
index f2e86237..b4f7489c 100644
--- a/mpi/amd64/mpih-add1.S
+++ b/mpi/amd64/mpih-add1.S
@@ -1,119 +1,119 @@
 /* AMD64 (x86_64) add_n -- Add two limb vectors of the same length > 0 and store
  *		   sum in a third limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 
  *                    2001, 2002, 2006 Free Software Foundation, Inc.
  *      Copyright (C) 2023 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	rdi
  *		   mpi_ptr_t s1_ptr,		rsi
  *		   mpi_ptr_t s2_ptr,		rdx
  *		   mpi_size_t size)		rcx
  */
 	TEXT
 	ALIGN(4)
 	.globl C_SYMBOL_NAME(_gcry_mpih_add_n)
 C_SYMBOL_NAME(_gcry_mpih_add_n:)
 	FUNC_ENTRY()
 	movl	%ecx, %r9d
 	andl	$3, %r9d
 	je	.Lprehandle0
 	cmpl	$2, %r9d
 	jb	.Lprehandle1
 	je	.Lprehandle2
 
 #define FIRST_ADD() \
 	movq	(%rsi), %rax; \
 	addq	(%rdx), %rax; \
 	movq	%rax, (%rdi)
 
 #define NEXT_ADD(offset) \
 	movq	offset(%rsi), %rax; \
 	adcq	offset(%rdx), %rax; \
 	movq	%rax, offset(%rdi)
 
 .Lprehandle3:
 	leaq	-2(%rcx), %rcx
 	FIRST_ADD();
 	NEXT_ADD(8);
 	NEXT_ADD(16);
 	decq	%rcx
 	je	.Lend
 	leaq	24(%rsi), %rsi
 	leaq	24(%rdx), %rdx
 	leaq	24(%rdi), %rdi
 	jmp	.Loop
 
 	ALIGN(3)
 .Lprehandle2:
 	leaq	-1(%rcx), %rcx
 	FIRST_ADD();
 	NEXT_ADD(8);
 	decq	%rcx
 	je	.Lend
 	leaq	16(%rsi), %rsi
 	leaq	16(%rdx), %rdx
 	leaq	16(%rdi), %rdi
 	jmp	.Loop
 
 	ALIGN(3)
 .Lprehandle1:
 	FIRST_ADD();
 	decq	%rcx
 	je	.Lend
 	leaq	8(%rsi), %rsi
 	leaq	8(%rdx), %rdx
 	leaq	8(%rdi), %rdi
 	jmp	.Loop
 
 	ALIGN(3)
 .Lprehandle0:
 	clc				/* clear cy */
 
 	ALIGN(4)			/* minimal alignment for claimed speed */
 .Loop:	leaq	-3(%rcx), %rcx
 	NEXT_ADD(0);
 	NEXT_ADD(8);
 	NEXT_ADD(16);
 	NEXT_ADD(24);
 	leaq	32(%rsi), %rsi
 	leaq	32(%rdx), %rdx
 	leaq	32(%rdi), %rdi
 	decq	%rcx
 	jne	.Loop
 
 	ALIGN(2)
 .Lend:
 	movl	$0, %eax		/* zero %rax */
 	adcl	%eax, %eax
 	FUNC_EXIT()
diff --git a/mpi/amd64/mpih-lshift.S b/mpi/amd64/mpih-lshift.S
index 3fa6e4fd..e62946ae 100644
--- a/mpi/amd64/mpih-lshift.S
+++ b/mpi/amd64/mpih-lshift.S
@@ -1,79 +1,79 @@
 /* AMD64 (x86_64) lshift -- Left shift a limb vector and store
  *		  result in a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 
  *                    2001, 2002, 2006 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_lshift( mpi_ptr_t wp,	rdi
  *		   mpi_ptr_t up,	rsi
  *		   mpi_size_t usize,	rdx
  *		   unsigned cnt)	rcx
  */
 
 	TEXT
 	ALIGN(4)
 	.globl C_SYMBOL_NAME(_gcry_mpih_lshift)
 C_SYMBOL_NAME(_gcry_mpih_lshift:)
 	FUNC_ENTRY()
 	/* Note: %xmm6 and %xmm7 not used for WIN64 ABI compatibility. */
 	movq	-8(%rsi,%rdx,8), %xmm4
 	movd	%ecx, %xmm1
 	movl	$64, %eax
 	subl	%ecx, %eax
 	movd	%eax, %xmm0
 	movdqa	%xmm4, %xmm3
 	psrlq	%xmm0, %xmm4
 	movq	%xmm4, %rax
 	subq	$2, %rdx
 	jl	.Lendo
 
 	ALIGN(4)			/* minimal alignment for claimed speed */
 .Loop:	movq	(%rsi,%rdx,8), %xmm5
 	movdqa	%xmm5, %xmm2
 	psrlq	%xmm0, %xmm5
 	psllq	%xmm1, %xmm3
 	por	%xmm5, %xmm3
 	movq	%xmm3, 8(%rdi,%rdx,8)
 	je	.Lende
 	movq	-8(%rsi,%rdx,8), %xmm4
 	movdqa	%xmm4, %xmm3
 	psrlq	%xmm0, %xmm4
 	psllq	%xmm1, %xmm2
 	por	%xmm4, %xmm2
 	movq	%xmm2, (%rdi,%rdx,8)
 	subq	$2, %rdx
 	jge	.Loop
 
 .Lendo:	movdqa	%xmm3, %xmm2
 .Lende:	psllq	%xmm1, %xmm2
 	movq	%xmm2, (%rdi)
 	FUNC_EXIT()
diff --git a/mpi/amd64/mpih-mul1.S b/mpi/amd64/mpih-mul1.S
index de5fa3ce..7bd9ff29 100644
--- a/mpi/amd64/mpih-mul1.S
+++ b/mpi/amd64/mpih-mul1.S
@@ -1,66 +1,66 @@
 /* AMD64 mul_1 -- Multiply a limb vector with a limb and store
  *			 the result in a second limb vector.
  *      Copyright (C) 1992, 1994, 1998, 
  *                    2001, 2002, 2006 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,	(rdi)
  *		  mpi_ptr_t s1_ptr,	(rsi)
  *		  mpi_size_t s1_size,	(rdx)
  *		  mpi_limb_t s2_limb)	(rcx)
  */
 
 
 	TEXT
 	ALIGN(4)
 
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_mul_1)
 C_SYMBOL_NAME(_gcry_mpih_mul_1:)
 
 	FUNC_ENTRY()
 	movq	%rdx, %r11
 	leaq	(%rsi,%rdx,8), %rsi
 	leaq	(%rdi,%rdx,8), %rdi
 	negq	%r11
 	xorl	%r8d, %r8d
 
 	ALIGN(4)
 .Loop:	movq	(%rsi,%r11,8), %rax
 	mulq	%rcx
 	addq	%r8, %rax
 	movl	$0, %r8d
 	adcq	%rdx, %r8
 	movq	%rax, (%rdi,%r11,8)
 	incq	%r11
 	jne	.Loop
 
 	movq	%r8, %rax
 	FUNC_EXIT()
diff --git a/mpi/amd64/mpih-mul2.S b/mpi/amd64/mpih-mul2.S
index 0b3025d6..abd39230 100644
--- a/mpi/amd64/mpih-mul2.S
+++ b/mpi/amd64/mpih-mul2.S
@@ -1,66 +1,66 @@
 /* AMD64 addmul2 -- Multiply a limb vector with a limb and add
  *		      the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1998,
  *                    2001, 2002, 2006 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,   (rdi)
  *		     mpi_ptr_t s1_ptr,	     (rsi)
  *		     mpi_size_t s1_size,     (rdx)
  *		     mpi_limb_t s2_limb)     (rcx)
  */
 	TEXT
 	ALIGN(4)
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_addmul_1)
 C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
 	FUNC_ENTRY()
 	movq	%rdx, %r11
 	leaq	(%rsi,%rdx,8), %rsi
 	leaq	(%rdi,%rdx,8), %rdi
 	negq	%r11
 	xorl	%r8d, %r8d
 	xorl	%r10d, %r10d
 
 	ALIGN(4)			/* minimal alignment for claimed speed */
 .Loop:	movq	(%rsi,%r11,8), %rax
 	mulq	%rcx
 	addq	(%rdi,%r11,8), %rax
 	adcq	%r10, %rdx
 	addq	%r8, %rax
 	movq	%r10, %r8
 	movq	%rax, (%rdi,%r11,8)
 	adcq	%rdx, %r8
 	incq	%r11
 	jne	.Loop
 
 	movq	%r8, %rax
 	FUNC_EXIT()
diff --git a/mpi/amd64/mpih-mul3.S b/mpi/amd64/mpih-mul3.S
index 7d3486e8..d0e85ba9 100644
--- a/mpi/amd64/mpih-mul3.S
+++ b/mpi/amd64/mpih-mul3.S
@@ -1,67 +1,67 @@
 /* AMD64 submul_1 -- Multiply a limb vector with a limb and add
  *		      the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1998, 
  *                    2001, 2002, 2006 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,   (rdi)
  *		     mpi_ptr_t s1_ptr,	     (rsi)
  *		     mpi_size_t s1_size,     (rdx)
  *		     mpi_limb_t s2_limb)     (rcx)
  */
 	TEXT
 	ALIGN(4)
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_submul_1)
 C_SYMBOL_NAME(_gcry_mpih_submul_1:)
 	FUNC_ENTRY()
 	movq	%rdx, %r11
 	leaq	(%rsi,%r11,8), %rsi
 	leaq	(%rdi,%r11,8), %rdi
 	negq	%r11
 	xorl	%r8d, %r8d
 
 	ALIGN(4)			/* minimal alignment for claimed speed */
 .Loop:	movq	(%rsi,%r11,8), %rax
 	movq	(%rdi,%r11,8), %r10
 	mulq	%rcx
 	subq	%r8, %r10
 	movl	$0, %r8d
 	adcl	%r8d, %r8d
 	subq	%rax, %r10
 	adcq	%rdx, %r8
 	movq	%r10, (%rdi,%r11,8)
 	incq	%r11
 	jne	.Loop
 
 	movq	%r8, %rax
 	FUNC_EXIT()
diff --git a/mpi/amd64/mpih-rshift.S b/mpi/amd64/mpih-rshift.S
index 4bc5db22..e4e0c090 100644
--- a/mpi/amd64/mpih-rshift.S
+++ b/mpi/amd64/mpih-rshift.S
@@ -1,82 +1,82 @@
 /* AMD64 (x86_64) rshift -- Right shift a limb vector and store
  *		  result in a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 
  *                    2001, 2002, 2006 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_rshift( mpi_ptr_t wp,	rdi
  *		   mpi_ptr_t up,	rsi
  *		   mpi_size_t usize,	rdx
  *		   unsigned cnt)	rcx
  */
 
 	TEXT
 	ALIGN(4)
 	.globl C_SYMBOL_NAME(_gcry_mpih_rshift)
 C_SYMBOL_NAME(_gcry_mpih_rshift:)
 	FUNC_ENTRY()
 	/* Note: %xmm6 and %xmm7 not used for WIN64 ABI compatibility. */
 	movq	(%rsi), %xmm4
 	movd	%ecx, %xmm1
 	movl	$64, %eax
 	subl	%ecx, %eax
 	movd	%eax, %xmm0
 	movdqa	%xmm4, %xmm3
 	psllq	%xmm0, %xmm4
 	movq	%xmm4, %rax
 	leaq	(%rsi,%rdx,8), %rsi
 	leaq	(%rdi,%rdx,8), %rdi
 	negq	%rdx
 	addq	$2, %rdx
 	jg	.Lendo
 
 	ALIGN(4)			/* minimal alignment for claimed speed */
 .Loop:	movq	-8(%rsi,%rdx,8), %xmm5
 	movdqa	%xmm5, %xmm2
 	psllq	%xmm0, %xmm5
 	psrlq	%xmm1, %xmm3
 	por	%xmm5, %xmm3
 	movq	%xmm3, -16(%rdi,%rdx,8)
 	je	.Lende
 	movq	(%rsi,%rdx,8), %xmm4
 	movdqa	%xmm4, %xmm3
 	psllq	%xmm0, %xmm4
 	psrlq	%xmm1, %xmm2
 	por	%xmm4, %xmm2
 	movq	%xmm2, -8(%rdi,%rdx,8)
 	addq	$2, %rdx
 	jle	.Loop
 
 .Lendo:	movdqa	%xmm3, %xmm2
 .Lende:	psrlq	%xmm1, %xmm2
 	movq	%xmm2, -8(%rdi)
 	FUNC_EXIT()
diff --git a/mpi/amd64/mpih-sub1.S b/mpi/amd64/mpih-sub1.S
index 32799c86..c2cc635b 100644
--- a/mpi/amd64/mpih-sub1.S
+++ b/mpi/amd64/mpih-sub1.S
@@ -1,119 +1,119 @@
 /* AMD64 (x86_64) sub_n -- Subtract two limb vectors of the same length > 0 and store
  *		   sum in a third limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 
  *                    2001, 2002, 2006 Free Software Foundation, Inc.
  *      Copyright (C) 2023 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	rdi
  *		   mpi_ptr_t s1_ptr,		rsi
  *		   mpi_ptr_t s2_ptr,		rdx
  *		   mpi_size_t size)		rcx
  */
 	TEXT
 	ALIGN(4)
 	.globl C_SYMBOL_NAME(_gcry_mpih_sub_n)
 C_SYMBOL_NAME(_gcry_mpih_sub_n:)
 	FUNC_ENTRY()
 	movl	%ecx, %r9d
 	andl	$3, %r9d
 	je	.Lprehandle0
 	cmpl	$2, %r9d
 	jb	.Lprehandle1
 	je	.Lprehandle2
 
 #define FIRST_SUB() \
 	movq	(%rsi), %rax; \
 	subq	(%rdx), %rax; \
 	movq	%rax, (%rdi)
 
 #define NEXT_SUB(offset) \
 	movq	offset(%rsi), %rax; \
 	sbbq	offset(%rdx), %rax; \
 	movq	%rax, offset(%rdi)
 
 .Lprehandle3:
 	leaq	-2(%rcx), %rcx
 	FIRST_SUB();
 	NEXT_SUB(8);
 	NEXT_SUB(16);
 	decq	%rcx
 	je	.Lend
 	leaq	24(%rsi), %rsi
 	leaq	24(%rdx), %rdx
 	leaq	24(%rdi), %rdi
 	jmp	.Loop
 
 	ALIGN(3)
 .Lprehandle2:
 	leaq	-1(%rcx), %rcx
 	FIRST_SUB();
 	NEXT_SUB(8);
 	decq	%rcx
 	je	.Lend
 	leaq	16(%rsi), %rsi
 	leaq	16(%rdx), %rdx
 	leaq	16(%rdi), %rdi
 	jmp	.Loop
 
 	ALIGN(3)
 .Lprehandle1:
 	FIRST_SUB();
 	decq	%rcx
 	je	.Lend
 	leaq	8(%rsi), %rsi
 	leaq	8(%rdx), %rdx
 	leaq	8(%rdi), %rdi
 	jmp	.Loop
 
 	ALIGN(3)
 .Lprehandle0:
 	clc				/* clear cy */
 
 	ALIGN(4)			/* minimal alignment for claimed speed */
 .Loop:	leaq	-3(%rcx), %rcx
 	NEXT_SUB(0);
 	NEXT_SUB(8);
 	NEXT_SUB(16);
 	NEXT_SUB(24);
 	leaq	32(%rsi), %rsi
 	leaq	32(%rdx), %rdx
 	leaq	32(%rdi), %rdi
 	decq	%rcx
 	jne	.Loop
 
 	ALIGN(2)
 .Lend:
 	movl	$0, %eax		/* zero %rax */
 	adcl	%eax, %eax
 	FUNC_EXIT()
diff --git a/mpi/config.links b/mpi/config.links
index 8cd6657e..94b42e53 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -1,436 +1,436 @@
 # config.links - helper for ../configure             -*- mode: sh -*-
 # Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 # Copyright (C) 2012  g10 Code GmbH
 #
 # This file is part of Libgcrypt.
 #
 # Libgcrypt is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as
 # published by the Free Software Foundation; either version 2.1 of
 # the License, or (at your option) any later version.
 #
 # Libgcrypt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Lesser General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+# License along with this program; if not, see <https://www.gnu.org/licenses/>.
+# SPDX-License-Identifier: LGPL-2.1-or-later
 #
 # sourced by ../configure to get the list of files to link
 # this should set $mpi_ln_list.
 # Note: this is called from the above directory.
 #
 # Reguired variables:
 #  $ac_cv_sys_symbol_underscore
 #  $gcry_cv_gcc_arm_platform_as_ok
 
 mpi_sflags=
 mpi_extra_modules=
 mpi_cpu_arch=
 
 test -d ./mpi || mkdir ./mpi
 
 # We grep the list of modules from the Makefile so that
 # we don't need to maintain them here.
 mpi_standard_modules=`$AWK '/^#BEGIN_ASM_LIST/,/^#END_ASM_LIST/ {
                  if( $3 != "O" ) print $2 }'  $srcdir/mpi/Makefile.am`
 mpi_optional_modules=`$AWK '/^#BEGIN_ASM_LIST/,/^#END_ASM_LIST/ {
                  if( $3 == "O" ) print $2 }' $srcdir/mpi/Makefile.am`
 
 
 echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
 echo "/* Host: ${host} */" >>./mpi/asm-syntax.h
 
 case "${host}" in
     i[34567]86*-*-openbsd[12]*     | \
     i[34567]86*-*-openbsd3.[0123]*)
        echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
        path=""
        mpi_cpu_arch="x86"
        ;;
     i[34567]86*-*-openbsd*         | \
     i[34567]86*-*-freebsd*-elf     | \
     i[34567]86*-*-freebsd[3-9]*    | \
     i[34567]86*-*-freebsd[12][0-9]*| \
     i[34567]86*-*-freebsdelf*      | \
     i[34567]86*-*-netbsd*          | \
     i[34567]86*-*-k*bsd*           | \
     pentium-*-netbsd*	           | \
     pentiumpro-*-netbsd*)
        echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h	   >>./mpi/asm-syntax.h
        path="i386"
        mpi_cpu_arch="x86"
        ;;
     i[34]86*-*-bsdi4*)
        echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h	 >>./mpi/asm-syntax.h
        path="i386"
        mpi_cpu_arch="x86"
        ;;
     i[34567]86*-*-linuxaout*  | \
     i[34567]86*-*-linuxoldld* | \
     i[34567]86*-*-*bsd*)
 	echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
 	echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	path="i386"
         mpi_cpu_arch="x86"
 	;;
     i[34567]86*-msdosdjgpp* | \
     i[34567]86*-apple-darwin*)
 	echo '#define BSD_SYNTAX'        >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	 >>./mpi/asm-syntax.h
 	path="i386"
         mpi_cpu_arch="x86"
 	;;
     i[34567]86*-*-* | \
     pentium-*-*     | \
     pentiumpro-*-*)
 	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	path="i386"
         mpi_cpu_arch="x86"
 	;;
     x86_64-apple-darwin*)
 	echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
 	path="amd64"
         mpi_cpu_arch="x86"
 	;;
     x86_64-*mingw32*)
 	echo '#define USE_MS_ABI' >>./mpi/asm-syntax.h
 	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
 	path="amd64"
         mpi_cpu_arch="x86"
         ;;
     x86_64-*-*)
 	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
 	path="amd64"
         mpi_cpu_arch="x86"
 	;;
     alpha*-*-*)
 	echo '/* configured for alpha */' >>./mpi/asm-syntax.h
 	path="alpha"
 	mpi_extra_modules="udiv-qrnnd"
         mpi_cpu_arch="alpha"
 	;;
     aarch64-*-*)
 	echo '/* configured for aarch64 */' >>./mpi/asm-syntax.h
 	path="aarch64"
 	mpi_cpu_arch="aarch64"
 	;;
     arm*-*-*)
 	mpi_cpu_arch="arm"
 	if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then
 	  echo '/* configured for arm */' >>./mpi/asm-syntax.h
 	  path="arm"
 	else
 	  echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
 	  path=""
 	fi
 	;;
     hppa7000*-*-*)
 	echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h
 	path="hppa1.1 hppa"
 	mpi_extra_modules="udiv-qrnnd"
 	mpi_cpu_arch="hppa"
 	;;
     hppa1.0*-*-*)
 	echo '/* configured for HPPA 1.0 */' >>./mpi/asm-syntax.h
 	path="hppa"
 	mpi_extra_modules="udiv-qrnnd"
 	mpi_cpu_arch="hppa"
 	;;
     hppa*-*-*)	# assume pa7100
 	echo '/* configured for HPPA (pa7100) */' >>./mpi/asm-syntax.h
 	path="pa7100 hppa1.1 hppa"
 	mpi_extra_modules="udiv-qrnnd"
 	mpi_cpu_arch="hppa"
 	;;
     sparc64-*-linux-gnu)
         echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
 	path=""
         mpi_cpu_arch="sparc"
 	;;
     sparc64-sun-solaris2*)
         echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
 	path=""
         mpi_cpu_arch="sparc"
         ;;
     sparc64-*-netbsd* | sparc64-*-freebsd* | sparc64-*-openbsd*)
        # There are no sparc64 assembler modules that work on the
        # *BSDs, so use the generic C functions.
        echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
        path=""
        mpi_cpu_arch="sparc"
        ;;
     sparc64*-*-*)
        echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
        path=""
        mpi_cpu_arch="sparc"
        ;;
     sparc9*-*-*     | \
     ultrasparc*-*-* )
 	echo '/* configured for sparc9 or higher */' >>./mpi/asm-syntax.h
 	path="sparc32v8 sparc32"
         mpi_cpu_arch="sparc"
 	;;
     sparc8*-*-*     | \
     microsparc*-*-*)
 	echo '/* configured for sparc8 */' >>./mpi/asm-syntax.h
 	path="sparc32v8 sparc32"
         mpi_cpu_arch="sparc"
 	;;
     supersparc*-*-*)
 	echo '/* configured for supersparc */' >>./mpi/asm-syntax.h
 	path="supersparc sparc32v8 sparc32"
 	mpi_extra_modules="udiv"
         mpi_cpu_arch="sparc"
 	;;
     sparc*-*-*)
 	echo '/* configured for sparc */' >>./mpi/asm-syntax.h
 	path="sparc32"
 	mpi_extra_modules="udiv"
         mpi_cpu_arch="sparc"
 	;;
     mips[34]*-*-* | \
     mips*-*-irix6*)
        echo '/* configured for MIPS3 */' >>./mpi/asm-syntax.h
        path="mips3"
        mpi_cpu_arch="mips"
        ;;
     mips*-*-*)
        echo '/* configured for MIPS2 */' >>./mpi/asm-syntax.h
        path="mips2"
        mpi_cpu_arch="mips"
        ;;
     s390x*-*-*)
        echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
        path=""
        mpi_cpu_arch="s390x"
        ;;
 
     # Motorola 68k configurations.  Let m68k mean 68020-68040.
     # mc68000 or mc68060 configurations need to be specified explicitly
     m680[234]0*-*-linuxaout* | \
     m68k*-*-linuxaout*)
 	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
 	path="m68k/mc68020 m68k"
         mpi_cpu_arch="m68k"
 	;;
     m68060*-*-linuxaout*)
 	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
 	path="m68k"
         mpi_cpu_arch="m68k"
 	;;
     m680[234]0*-*-linux* | \
     m68k*-*-linux*)
 	echo '#define ELF_SYNTAX'           >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
         mpi_cpu_arch="m68k"
 	;;
     m68060*-*-linux*)
 	echo '#define ELF_SYNTAX'           >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
 	path="m68k"
         mpi_cpu_arch="m68k"
 	;;
     m68k-atari-mint)
 	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
 	path="m68k"
         mpi_cpu_arch="m68k"
 	;;
     m68000*-*-* | \
     m68060*-*-*)
 	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
 	path="m68k/mc68000"
         mpi_cpu_arch="m68k"
 	;;
     m680[234]0*-*-* | \
     m68k*-*-*)
 	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
 	path="m68k/mc68020 m68k"
         mpi_cpu_arch="m68k"
 	;;
 
     powerpc-apple-darwin*)
        echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
        path=""
        mpi_cpu_arch="ppc"
        ;;
 
     powerpc*-*-netbsd* | powerpc*-*-openbsd*)
        echo '/* configured {Open,Net}BSD on powerpc */' >>./mpi/asm-syntax.h
        echo '#define ELF_SYNTAX'                 >>./mpi/asm-syntax.h
        cat   $srcdir/mpi/powerpc32/syntax.h	 >>./mpi/asm-syntax.h
        mpi_sflags="-Wa,-mppc"
        path="powerpc32"
        mpi_cpu_arch="ppc"
        ;;
 
     ppc620-*-*	    | \
     powerpc64*-*-*)
 	mpi_sflags="-Wa,-mppc"
 	path="powerpc64"
         mpi_cpu_arch="ppc"
 	;;
     powerpc*-*-linux*)
 	echo '/* configured for powerpc/ELF */' >>./mpi/asm-syntax.h
 	echo '#define ELF_SYNTAX'               >>./mpi/asm-syntax.h
 	cat   $srcdir/mpi/powerpc32/syntax.h	>>./mpi/asm-syntax.h
 	path="powerpc32"
 	mpi_cpu_arch="ppc"
 	;;
 
     rs6000-*-aix[456789]*    | \
     rs6000-*-aix3.2.[456789])
 	mpi_sflags="-Wa,-mpwr"
 	path="power"
 	mpi_extra_modules="udiv-w-sdiv"
 	mpi_cpu_arch="ppc"
 	;;
     rs6000-*-* | \
     power-*-*  | \
     power2-*-*)
 	mpi_sflags="-Wa,-mppc"
 	path="power"
 	mpi_extra_modules="udiv-w-sdiv"
         mpi_cpu_arch="ppc"
 	;;
     powerpc-ibm-aix4.2.* )
 	# I am not sure about this one but a machine identified by
 	# powerpc-ibm-aix4.2.1.0 cannot use the powerpc32 code.
 	mpi_sflags="-Wa,-mpwr"
 	path="power"
 	mpi_extra_modules="udiv-w-sdiv"
         mpi_cpu_arch="ppc"
 	;;
     ppc601-*-*)
 	mpi_sflags="-Wa,-mppc"
 	path="power powerpc32"
         mpi_cpu_arch="ppc"
 	;;
     ppc60[234]*-*-*)
 	mpi_sflags="-Wa,-mppc"
 	path="powerpc32"
         mpi_cpu_arch="ppc"
 	;;
     powerpc*-*-*)
 	mpi_sflags="-Wa,-mppc"
 	path="powerpc32"
         mpi_cpu_arch="ppc"
 	;;
     *)
 	echo '/* Platform not known */' >>./mpi/asm-syntax.h
 	path=""
 	;;
 esac
 
 # If asm modules are disabled reset the found variables but keep
 # mpi_cpu_arch.
 if test "$try_asm_modules" != "yes" ; then
     echo '/* Assembler modules disabled on request */' >./mpi/asm-syntax.h
     path=""
     mpi_sflags=""
     mpi_extra_modules=""
     mpi_cpu_arch="disabled"
 fi
 
 # Make sure that mpi_cpu_arch is not the empty string.
 if test x"$mpi_cpu_arch" = x ; then
     mpi_cpu_arch="unknown"
 fi
 
 # Add .note.gnu.property section for Intel CET in assembler sources
 # when CET is enabled.  */
 if test x"$mpi_cpu_arch" = xx86 ; then
     cat <<EOF >> ./mpi/asm-syntax.h
 
 #if defined(__ASSEMBLER__) && defined(__CET__)
 # include <cet.h>
 #endif
 EOF
 fi
 
 # Make sysdep.h
 echo '/* created by config.links - do not edit */' >./mpi/sysdep.h
 if test x$ac_cv_sys_symbol_underscore = xyes; then
     cat <<EOF >>./mpi/sysdep.h
 #if __STDC__
 #define C_SYMBOL_NAME(name) _##name
 #else
 #define C_SYMBOL_NAME(name) _/**/name
 #endif
 EOF
 else
     cat <<EOF >>./mpi/sysdep.h
 #define C_SYMBOL_NAME(name) name
 EOF
 fi
 
 
 # Figure the required modules out
 mpi_required_modules=$mpi_standard_modules
 if test "$mpi_extra_modules" != ""; then
   for fn in $mpi_extra_modules; do
       for i in $mpi_optional_modules; do
           if test "$fn" = "$i" ; then
              mpi_required_modules="$mpi_required_modules $fn"
           fi
       done
   done
 fi
 
 # Try to get file to link from the assembler subdirectory and
 # if this fails get it from the generic subdirectory.
 mpi_ln_list=
 mpi_mod_list=
 path=`echo "$mpi_extra_path $path generic" | tr ':' ' '`
 echo '/* Created by config.links - do not edit */' >./mpi/mod-source-info.h
 echo "/* Host: ${host} */" >>./mpi/mod-source-info.h
 echo "static char mod_source_info[] =" >>./mpi/mod-source-info.h
 for fn in $mpi_required_modules ; do
     fnu=`echo $fn | sed 's/-/_/g'`
     eval mpi_mod_c_${fnu}=no
     eval mpi_mod_asm_${fnu}=no
     for dir in $path ; do
 	rm -f $srcdir/mpi/$fn.[Sc]
 	if test -f $srcdir/mpi/$dir/$fn.S ; then
             echo "  \":$dir/$fn.S\"" >>./mpi/mod-source-info.h
             mpi_ln_list="$mpi_ln_list mpi/$fn-asm.S:mpi/$dir/$fn.S"
             eval mpi_mod_asm_${fnu}=yes
             mpi_mod_list="$mpi_mod_list $fn"
 	    break;
 	elif test -f $srcdir/mpi/$dir/$fn.c ; then
             echo "  \":$dir/$fn.c\"" >>./mpi/mod-source-info.h
             mpi_ln_list="$mpi_ln_list mpi/$fn.c:mpi/$dir/$fn.c"
             eval mpi_mod_c_${fnu}=yes
             mpi_mod_list="$mpi_mod_list $fn"
 	    break;
 	fi
     done
 done
 echo "  ;" >>./mpi/mod-source-info.h
 
 # Same thing for the file which defines the limb size
 path=`echo "$path generic" | tr ':' ' '`
 for dir in $path ; do
     rm -f $srcdir/mpi/mpi-asm-defs.h
     if test -f $srcdir/mpi/$dir/mpi-asm-defs.h ; then
         mpi_ln_list="$mpi_ln_list mpi/mpi-asm-defs.h:mpi/$dir/mpi-asm-defs.h"
         break;
     fi
 done
diff --git a/mpi/generic/mpih-add1.c b/mpi/generic/mpih-add1.c
index 4a84df64..0a51f06c 100644
--- a/mpi/generic/mpih-add1.c
+++ b/mpi/generic/mpih-add1.c
@@ -1,65 +1,65 @@
 /* mpihelp-add_1.c  -  MPI helper functions
  * Copyright (C) 1994, 1996, 1997, 1998, 
  *               2000, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 mpi_limb_t
 _gcry_mpih_add_n (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
                   mpi_ptr_t s2_ptr, mpi_size_t size)
 {
   mpi_limb_t x, y, cy;
   mpi_size_t j;
 
   /* The loop counter and index J goes from -SIZE to -1.  This way
      the loop becomes faster.  */
   j = -size;
 
   /* Offset the base pointers to compensate for the negative indices. */
   s1_ptr -= j;
   s2_ptr -= j;
   res_ptr -= j;
 
   cy = 0;
   do 
     {
       y = s2_ptr[j];
       x = s1_ptr[j];
       y += cy;		  /* add previous carry to one addend */
       cy = y < cy;	  /* get out carry from that addition */
       y += x; 		  /* add other addend */
       cy += y < x;	  /* get out carry from that add, combine */
       res_ptr[j] = y;
     } 
   while ( ++j );
 
   return cy;
 }
 
diff --git a/mpi/generic/mpih-lshift.c b/mpi/generic/mpih-lshift.c
index f48c12cd..8a9c1257 100644
--- a/mpi/generic/mpih-lshift.c
+++ b/mpi/generic/mpih-lshift.c
@@ -1,68 +1,68 @@
 /* mpi-lshift.c  - MPI helper functions
  * Copyright (C) 1994, 1996, 1998, 2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 
 /* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
  * and store the USIZE least significant digits of the result at WP.
  * Return the bits shifted out from the most significant digit.
  *
  * Argument constraints:
  * 1. 0 < CNT < BITS_PER_MP_LIMB
  * 2. If the result is to be written over the input, WP must be >= UP.
  */
 
 mpi_limb_t
 _gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
 					    unsigned int cnt)
 {
   mpi_limb_t high_limb, low_limb;
   unsigned sh_1, sh_2;
   mpi_size_t i;
   mpi_limb_t retval;
 
   sh_1 = cnt;
   wp += 1;
   sh_2 = BITS_PER_MPI_LIMB - sh_1;
   i = usize - 1;
   low_limb = up[i];
   retval = low_limb >> sh_2;
   high_limb = low_limb;
   while ( --i >= 0 ) 
     {
       low_limb = up[i];
       wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
       high_limb = low_limb;
     }
   wp[i] = high_limb << sh_1;
 
   return retval;
 }
 
 
diff --git a/mpi/generic/mpih-mul1.c b/mpi/generic/mpih-mul1.c
index 0e8197d8..e88be914 100644
--- a/mpi/generic/mpih-mul1.c
+++ b/mpi/generic/mpih-mul1.c
@@ -1,62 +1,62 @@
 /* mpihelp-mul_1.c  -  MPI helper functions
  * Copyright (C) 1994, 1996, 1997, 1998, 2001,
  *               2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 mpi_limb_t
 _gcry_mpih_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 						    mpi_limb_t s2_limb)
 {
   mpi_limb_t cy_limb;
   mpi_size_t j;
   mpi_limb_t prod_high, prod_low;
 
   /* The loop counter and index J goes from -S1_SIZE to -1.  This way
    * the loop becomes faster.  */
   j = -s1_size;
 
   /* Offset the base pointers to compensate for the negative indices.  */
   s1_ptr -= j;
   res_ptr -= j;
 
   cy_limb = 0;
   do 
     {
       umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );
       prod_low += cy_limb;
       cy_limb = (prod_low < cy_limb?1:0) + prod_high;
       res_ptr[j] = prod_low;
     } 
   while( ++j );
 
   return cy_limb;
 }
 
diff --git a/mpi/generic/mpih-mul2.c b/mpi/generic/mpih-mul2.c
index 3b754960..bc925e01 100644
--- a/mpi/generic/mpih-mul2.c
+++ b/mpi/generic/mpih-mul2.c
@@ -1,68 +1,68 @@
 /* mpih-mul2.c  -  MPI helper functions
  * Copyright (C) 1994, 1996, 1997, 1998, 2001,
  *               2002  Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 
 mpi_limb_t
 _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 		  mpi_size_t s1_size, mpi_limb_t s2_limb)
 {
   mpi_limb_t cy_limb;
   mpi_size_t j;
   mpi_limb_t prod_high, prod_low;
   mpi_limb_t x;
 
   /* The loop counter and index J goes from -SIZE to -1.  This way
    * the loop becomes faster.  */
   j = -s1_size;
   res_ptr -= j;
   s1_ptr -= j;
 
   cy_limb = 0;
   do 
     {
       umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );
 
       prod_low += cy_limb;
       cy_limb = (prod_low < cy_limb?1:0) + prod_high;
 
       x = res_ptr[j];
       prod_low = x + prod_low;
       cy_limb += prod_low < x?1:0;
       res_ptr[j] = prod_low;
     } 
   while ( ++j );
     
   return cy_limb;
 }
 
 
diff --git a/mpi/generic/mpih-mul3.c b/mpi/generic/mpih-mul3.c
index 5e84f94f..4ecd2c6b 100644
--- a/mpi/generic/mpih-mul3.c
+++ b/mpi/generic/mpih-mul3.c
@@ -1,68 +1,68 @@
 /* mpih-mul3.c  -  MPI helper functions
  * Copyright (C) 1994, 1996, 1997, 1998, 2001,
  *               2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 
 mpi_limb_t
 _gcry_mpih_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 		  mpi_size_t s1_size, mpi_limb_t s2_limb)
 {
   mpi_limb_t cy_limb;
   mpi_size_t j;
   mpi_limb_t prod_high, prod_low;
   mpi_limb_t x;
 
   /* The loop counter and index J goes from -SIZE to -1.  This way
    * the loop becomes faster.  */
   j = -s1_size;
   res_ptr -= j;
   s1_ptr -= j;
 
   cy_limb = 0;
   do 
     {
       umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb);
 
       prod_low += cy_limb;
       cy_limb = (prod_low < cy_limb?1:0) + prod_high;
 
       x = res_ptr[j];
       prod_low = x - prod_low;
       cy_limb += prod_low > x?1:0;
       res_ptr[j] = prod_low;
     } 
   while( ++j );
 
   return cy_limb;
 }
 
 
diff --git a/mpi/generic/mpih-rshift.c b/mpi/generic/mpih-rshift.c
index e40794fc..156337b3 100644
--- a/mpi/generic/mpih-rshift.c
+++ b/mpi/generic/mpih-rshift.c
@@ -1,67 +1,67 @@
 /* mpih-rshift.c  -  MPI helper functions
  * Copyright (C) 1994, 1996, 1998, 1999,
  *               2000, 2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 
 
 /* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
  * and store the USIZE least significant limbs of the result at WP.
  * The bits shifted out to the right are returned.
  *
  * Argument constraints:
  * 1. 0 < CNT < BITS_PER_MP_LIMB
  * 2. If the result is to be written over the input, WP must be <= UP.
  */
 
 mpi_limb_t
 _gcry_mpih_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt)
 {
   mpi_limb_t high_limb, low_limb;
   unsigned sh_1, sh_2;
   mpi_size_t i;
   mpi_limb_t retval;
 
   sh_1 = cnt;
   wp -= 1;
   sh_2 = BITS_PER_MPI_LIMB - sh_1;
   high_limb = up[0];
   retval = high_limb << sh_2;
   low_limb = high_limb;
   for (i=1; i < usize; i++)
     {
       high_limb = up[i];
       wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
       low_limb = high_limb;
     }
   wp[i] = low_limb >> sh_1;
 
   return retval;
 }
 
diff --git a/mpi/generic/mpih-sub1.c b/mpi/generic/mpih-sub1.c
index e88821bf..b8eff0db 100644
--- a/mpi/generic/mpih-sub1.c
+++ b/mpi/generic/mpih-sub1.c
@@ -1,66 +1,66 @@
 /* mpihelp-add_2.c  -  MPI helper functions
  * Copyright (C) 1994, 1996, 1997, 1998, 2001,
  *               2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 mpi_limb_t
 _gcry_mpih_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 				  mpi_ptr_t s2_ptr, mpi_size_t size)
 {
   mpi_limb_t x, y, cy;
   mpi_size_t j;
 
   /* The loop counter and index J goes from -SIZE to -1.  This way
      the loop becomes faster.  */
   j = -size;
 
   /* Offset the base pointers to compensate for the negative indices.  */
   s1_ptr -= j;
   s2_ptr -= j;
   res_ptr -= j;
 
   cy = 0;
   do 
     {
       y = s2_ptr[j];
       x = s1_ptr[j];
       y += cy;		  /* add previous carry to subtrahend */
       cy = y < cy;		  /* get out carry from that addition */
       y = x - y;		  /* main subtract */
       cy += y > x;		  /* get out carry from the subtract, combine */
       res_ptr[j] = y;
     } 
   while( ++j );
 
   return cy;
 }
 
 
diff --git a/mpi/generic/udiv-w-sdiv.c b/mpi/generic/udiv-w-sdiv.c
index e80d98bc..719d1826 100644
--- a/mpi/generic/udiv-w-sdiv.c
+++ b/mpi/generic/udiv-w-sdiv.c
@@ -1,133 +1,133 @@
 /* mpih-w-sdiv -- implement udiv_qrnnd on machines with only signed
  *	      	  division.
  * Copyright (C) 1992, 1994, 1996, 1998, 2002 Free Software Foundation, Inc.
  * Contributed by Peter L. Montgomery.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 
 #if 0  /* not yet ported to MPI */
 
 mpi_limb_t
 mpihelp_udiv_w_sdiv( mpi_limp_t *rp,
 		     mpi_limp_t *a1,
 		     mpi_limp_t *a0,
 		     mpi_limp_t *d   )
 {
   mp_limb_t q, r;
   mp_limb_t c0, c1, b1;
 
   if ((mpi_limb_signed_t) d >= 0)
     {
       if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1)))
 	{
 	  /* dividend, divisor, and quotient are nonnegative */
 	  sdiv_qrnnd (q, r, a1, a0, d);
 	}
       else
 	{
 	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
 	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1));
 	  /* Divide (c1*2^32 + c0) by d */
 	  sdiv_qrnnd (q, r, c1, c0, d);
 	  /* Add 2^31 to quotient */
 	  q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
 	}
     }
   else
     {
       b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
       c1 = a1 >> 1;			/* A/2 */
       c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1);
 
       if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
 	{
 	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
 
 	  r = 2*r + (a0 & 1);		/* Remainder from A/(2*b1) */
 	  if ((d & 1) != 0)
 	    {
 	      if (r >= q)
 		r = r - q;
 	      else if (q - r <= d)
 		{
 		  r = r - q + d;
 		  q--;
 		}
 	      else
 		{
 		  r = r - q + 2*d;
 		  q -= 2;
 		}
 	    }
 	}
       else if (c1 < b1) 		/* So 2^31 <= (A/2)/b1 < 2^32 */
 	{
 	  c1 = (b1 - 1) - c1;
 	  c0 = ~c0;			/* logical NOT */
 
 	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
 
 	  q = ~q;			/* (A/2)/b1 */
 	  r = (b1 - 1) - r;
 
 	  r = 2*r + (a0 & 1);		/* A/(2*b1) */
 
 	  if ((d & 1) != 0)
 	    {
 	      if (r >= q)
 		r = r - q;
 	      else if (q - r <= d)
 		{
 		  r = r - q + d;
 		  q--;
 		}
 	      else
 		{
 		  r = r - q + 2*d;
 		  q -= 2;
 		}
 	    }
 	}
       else				/* Implies c1 = b1 */
 	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
 	  if (a0 >= -d)
 	    {
 	      q = -1;
 	      r = a0 + d;
 	    }
 	  else
 	    {
 	      q = -2;
 	      r = a0 + 2*d;
 	    }
 	}
     }
 
   *rp = r;
   return q;
 }
 
 #endif
 
diff --git a/mpi/hppa/mpih-add1.S b/mpi/hppa/mpih-add1.S
index 3bc0e5e1..8001bda3 100644
--- a/mpi/hppa/mpih-add1.S
+++ b/mpi/hppa/mpih-add1.S
@@ -1,70 +1,70 @@
 /* hppa add_n -- Add two limb vectors of the same length > 0 and store
  *		 sum in a third limb vector.
  *
  *      Copyright (C) 1992, 1994, 1998,
  *                    2001, 2002 Fee Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	(gr26)
  *		   mpi_ptr_t s1_ptr,	(gr25)
  *		   mpi_ptr_t s2_ptr,	(gr24)
  *		   mpi_size_t size)	(gr23)
  *
  * One might want to unroll this as for other processors, but it turns
  * out that the data cache contention after a store makes such
  * unrolling useless.  We can't come under 5 cycles/limb anyway.
  */
 
 	.code
 	.export 	_gcry_mpih_add_n
 	.label		_gcry_mpih_add_n
 	.proc
 	.callinfo	frame=0,no_calls
 	.entry
 
 	ldws,ma 	4(0,%r25),%r20
 	ldws,ma 	4(0,%r24),%r19
 
 	addib,= 	-1,%r23,L$end	; check for (SIZE == 1)
 	 add		%r20,%r19,%r28	; add first limbs ignoring cy
 
 	.label L$loop
 	ldws,ma 	4(0,%r25),%r20
 	ldws,ma 	4(0,%r24),%r19
 	stws,ma 	%r28,4(0,%r26)
 	addib,<>	-1,%r23,L$loop
 	 addc		%r20,%r19,%r28
 
 	.label L$end
 	stws		%r28,0(0,%r26)
 	bv		0(%r2)
 	 addc		%r0,%r0,%r28
 
 	.exit
 	.procend
diff --git a/mpi/hppa/mpih-lshift.S b/mpi/hppa/mpih-lshift.S
index 91b29bb6..ba478ff2 100644
--- a/mpi/hppa/mpih-lshift.S
+++ b/mpi/hppa/mpih-lshift.S
@@ -1,77 +1,77 @@
 /* hppa   lshift
  *
  *      Copyright (C) 1992, 1994, 1998
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_lshift( mpi_ptr_t wp,	(gr26)
  *		   mpi_ptr_t up,	(gr25)
  *		   mpi_size_t usize,	(gr24)
  *		   unsigned cnt)	(gr23)
  */
 
 	.code
 	.export 	_gcry_mpih_lshift
 	.label		_gcry_mpih_lshift
 	.proc
 	.callinfo	frame=64,no_calls
 	.entry
 
 	sh2add		%r24,%r25,%r25
 	sh2add		%r24,%r26,%r26
 	ldws,mb 	-4(0,%r25),%r22
 	subi		32,%r23,%r1
 	mtsar		%r1
 	addib,= 	-1,%r24,L$0004
 	vshd		%r0,%r22,%r28		; compute carry out limb
 	ldws,mb 	-4(0,%r25),%r29
 	addib,= 	-1,%r24,L$0002
 	vshd		%r22,%r29,%r20
 
 	.label	L$loop
 	ldws,mb 	-4(0,%r25),%r22
 	stws,mb 	%r20,-4(0,%r26)
 	addib,= 	-1,%r24,L$0003
 	vshd		%r29,%r22,%r20
 	ldws,mb 	-4(0,%r25),%r29
 	stws,mb 	%r20,-4(0,%r26)
 	addib,<>	-1,%r24,L$loop
 	vshd		%r22,%r29,%r20
 
 	.label	L$0002
 	stws,mb 	%r20,-4(0,%r26)
 	vshd		%r29,%r0,%r20
 	bv		0(%r2)
 	stw		%r20,-4(0,%r26)
 	.label	L$0003
 	stws,mb 	%r20,-4(0,%r26)
 	.label	L$0004
 	vshd		%r22,%r0,%r20
 	bv		0(%r2)
 	stw		%r20,-4(0,%r26)
 
 	.exit
 	.procend
 
 
 
diff --git a/mpi/hppa/mpih-rshift.S b/mpi/hppa/mpih-rshift.S
index 37a9d4ef..c486fc75 100644
--- a/mpi/hppa/mpih-rshift.S
+++ b/mpi/hppa/mpih-rshift.S
@@ -1,73 +1,73 @@
 /* hppa   rshift
  *
  *      Copyright (C) 1992, 1994, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_rshift( mpi_ptr_t wp,       (gr26)
  *		   mpi_ptr_t up,       (gr25)
  *		   mpi_size_t usize,   (gr24)
  *		   unsigned cnt)       (gr23)
  */
 
 	.code
 	.export 	_gcry_mpih_rshift
 	.label		_gcry_mpih_rshift
 	.proc
 	.callinfo	frame=64,no_calls
 	.entry
 
 	ldws,ma 	4(0,%r25),%r22
 	mtsar		%r23
 	addib,= 	-1,%r24,L$r004
 	vshd		%r22,%r0,%r28		; compute carry out limb
 	ldws,ma 	4(0,%r25),%r29
 	addib,= 	-1,%r24,L$r002
 	vshd		%r29,%r22,%r20
 
 	.label	L$roop
 	ldws,ma 	4(0,%r25),%r22
 	stws,ma 	%r20,4(0,%r26)
 	addib,= 	-1,%r24,L$r003
 	vshd		%r22,%r29,%r20
 	ldws,ma 	4(0,%r25),%r29
 	stws,ma 	%r20,4(0,%r26)
 	addib,<>	-1,%r24,L$roop
 	vshd		%r29,%r22,%r20
 
 	.label	L$r002
 	stws,ma 	%r20,4(0,%r26)
 	vshd		%r0,%r29,%r20
 	bv		0(%r2)
 	stw		%r20,0(0,%r26)
 	.label	L$r003
 	stws,ma 	%r20,4(0,%r26)
 	.label	L$r004
 	vshd		%r0,%r22,%r20
 	bv		0(%r2)
 	stw		%r20,0(0,%r26)
 
 	.exit
 	.procend
 
diff --git a/mpi/hppa/mpih-sub1.S b/mpi/hppa/mpih-sub1.S
index 8d197e41..25056328 100644
--- a/mpi/hppa/mpih-sub1.S
+++ b/mpi/hppa/mpih-sub1.S
@@ -1,78 +1,78 @@
 /* hppa   sub_n -- Sub two limb vectors of the same length > 0 and store
  *		   sum in a third limb vector.
  *
  *      Copyright (C) 1992, 1994, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	(gr26)
  *		   mpi_ptr_t s1_ptr,	(gr25)
  *		   mpi_ptr_t s2_ptr,	(gr24)
  *		   mpi_size_t size)	(gr23)
  *
  * One might want to unroll this as for other processors, but it turns
  * out that the data cache contention after a store makes such
  * unrolling useless.  We can't come under 5 cycles/limb anyway.
  */
 
 
 	.code
 	.export 	_gcry_mpih_sub_n
 	.label		_gcry_mpih_sub_n
 	.proc
 	.callinfo	frame=0,no_calls
 	.entry
 
 	ldws,ma 	4(0,%r25),%r20
 	ldws,ma 	4(0,%r24),%r19
 
 	addib,= 	-1,%r23,L$end	; check for (SIZE == 1)
 	 sub		%r20,%r19,%r28	; subtract first limbs ignoring cy
 
 	.label	L$loop
 	ldws,ma 	4(0,%r25),%r20
 	ldws,ma 	4(0,%r24),%r19
 	stws,ma 	%r28,4(0,%r26)
 	addib,<>	-1,%r23,L$loop
 	 subb		%r20,%r19,%r28
 
 	.label	L$end
 	stws		%r28,0(0,%r26)
 	addc		%r0,%r0,%r28
 	bv		0(%r2)
 	 subi		1,%r28,%r28
 
 	.exit
 	.procend
 
 
 
diff --git a/mpi/hppa/udiv-qrnnd.S b/mpi/hppa/udiv-qrnnd.S
index 59ebf7a0..06f06d15 100644
--- a/mpi/hppa/udiv-qrnnd.S
+++ b/mpi/hppa/udiv-qrnnd.S
@@ -1,297 +1,297 @@
 /* HP-PA  __udiv_qrnnd division support, used from longlong.h.
  *	  This version runs fast on pre-PA7000 CPUs.
  *
  *      Copyright (C) 1993, 1994, 1998, 2001,
  *                     2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 /* INPUT PARAMETERS
  *   rem_ptr	   gr26
  *   n1 	   gr25
  *   n0 	   gr24
  *   d		   gr23
  *
  *   The code size is a bit excessive.	We could merge the last two ds;addc
  *   sequences by simply moving the "bb,< Odd" instruction down.  The only
  *   trouble is the FFFFFFFF code that would need some hacking.
  */
 
 	.code
 	.export 	__udiv_qrnnd
 	.label		__udiv_qrnnd
 	.proc
 	.callinfo	frame=0,no_calls
 	.entry
 
 	comb,<		%r23,0,L$largedivisor
 	 sub		%r0,%r23,%r1		; clear cy as side-effect
 	ds		%r0,%r1,%r0
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r23,%r25
 	addc		%r24,%r24,%r28
 	ds		%r25,%r23,%r25
 	comclr,>=	%r25,%r0,%r0
 	addl		%r25,%r23,%r25
 	stws		%r25,0(0,%r26)
 	bv		0(%r2)
 	 addc		%r28,%r28,%r28
 
 	.label	L$largedivisor
 	extru		%r24,31,1,%r19		; r19 = n0 & 1
 	bb,<		%r23,31,L$odd
 	 extru		%r23,30,31,%r22 	; r22 = d >> 1
 	shd		%r25,%r24,1,%r24	; r24 = new n0
 	extru		%r25,30,31,%r25 	; r25 = new n1
 	sub		%r0,%r22,%r21
 	ds		%r0,%r21,%r0
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	comclr,>=	%r25,%r0,%r0
 	addl		%r25,%r22,%r25
 	sh1addl 	%r25,%r19,%r25
 	stws		%r25,0(0,%r26)
 	bv		0(%r2)
 	 addc		%r24,%r24,%r28
 
 	.label	L$odd
 	addib,sv,n	1,%r22,L$FF..		; r22 = (d / 2 + 1)
 	shd		%r25,%r24,1,%r24	; r24 = new n0
 	extru		%r25,30,31,%r25 	; r25 = new n1
 	sub		%r0,%r22,%r21
 	ds		%r0,%r21,%r0
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r24
 	ds		%r25,%r22,%r25
 	addc		%r24,%r24,%r28
 	comclr,>=	%r25,%r0,%r0
 	addl		%r25,%r22,%r25
 	sh1addl 	%r25,%r19,%r25
 ; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
 	add,nuv 	%r28,%r25,%r25
 	addl		%r25,%r1,%r25
 	addc		%r0,%r28,%r28
 	sub,<<		%r25,%r23,%r0
 	addl		%r25,%r1,%r25
 	stws		%r25,0(0,%r26)
 	bv		0(%r2)
 	 addc		%r0,%r28,%r28
 
 ; This is just a special case of the code above.
 ; We come here when d == 0xFFFFFFFF
 	.label	L$FF..
 	add,uv		%r25,%r24,%r24
 	sub,<<		%r24,%r23,%r0
 	ldo		1(%r24),%r24
 	stws		%r24,0(0,%r26)
 	bv		0(%r2)
 	 addc		%r0,%r25,%r28
 
 	.exit
 	.procend
diff --git a/mpi/hppa1.1/mpih-mul1.S b/mpi/hppa1.1/mpih-mul1.S
index 45926dd7..b474f27c 100644
--- a/mpi/hppa1.1/mpih-mul1.S
+++ b/mpi/hppa1.1/mpih-mul1.S
@@ -1,115 +1,115 @@
 /* hppa1.1  mul_1 -- Multiply a limb vector with a limb and store
  *		     the result in a second limb vector.
  *
  *      Copyright (C) 1992, 1993, 1994, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,	(r26)
  *		  mpi_ptr_t s1_ptr,	(r25)
  *		  mpi_size_t s1_size,	(r24)
  *		  mpi_limb_t s2_limb)	(r23)
  *
  *
  *
  * This runs at 9 cycles/limb on a PA7000.  With the used instructions, it can
  * not become faster due to data cache contention after a store.  On the
  * PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
  * only the xmpyu does not need the integer pipeline, so the only dual-issue
  * we will get are addc+xmpyu.	Unrolling would not help either CPU.
  *
  * We could use fldds to read two limbs at a time from the S1 array, and that
  * could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
  * PA7100, respectively.  We don't do that since it does not seem worth the
  * (alignment) troubles...
  *
  * At least the PA7100 is rumored to be able to deal with cache-misses
  * without stalling instruction issue.	If this is true, and the cache is
  * actually also lockup-free, we should use a deeper software pipeline, and
  * load from S1 very early!  (The loads and stores to -12(sp) will surely be
  * in the cache.)
  */
 
 	.level		1.1
 
 	.code
 	.export 	_gcry_mpih_mul_1
 	.label		_gcry_mpih_mul_1
 	.proc
 	.callinfo	frame=64,no_calls
 	.entry
 
 	ldo		64(%r30),%r30
 	fldws,ma	4(%r25),%fr5
 	stw		%r23,-16(%r30)		; move s2_limb ...
 	addib,= 	-1,%r24,L$just_one_limb
 	 fldws		-16(%r30),%fr4		; ... into fr4
 	add		%r0,%r0,%r0		; clear carry
 	xmpyu		%fr4,%fr5,%fr6
 	fldws,ma	4(%r25),%fr7
 	fstds		%fr6,-16(%r30)
 	xmpyu		%fr4,%fr7,%fr8
 	ldw		-12(%r30),%r19		; least significant limb in product
 	ldw		-16(%r30),%r28
 
 	fstds		%fr8,-16(%r30)
 	addib,= 	-1,%r24,L$end
 	 ldw		-12(%r30),%r1
 
 ; Main loop
 	.label	L$loop
 	fldws,ma	4(%r25),%fr5
 	stws,ma 	%r19,4(%r26)
 	addc		%r28,%r1,%r19
 	xmpyu		%fr4,%fr5,%fr6
 	ldw		-16(%r30),%r28
 	fstds		%fr6,-16(%r30)
 	addib,<>	-1,%r24,L$loop
 	 ldw		-12(%r30),%r1
 
 	.label	L$end
 	stws,ma 	%r19,4(%r26)
 	addc		%r28,%r1,%r19
 	ldw		-16(%r30),%r28
 	stws,ma 	%r19,4(%r26)
 	addc		%r0,%r28,%r28
 	bv		0(%r2)
 	 ldo		-64(%r30),%r30
 
 	.label	L$just_one_limb
 	xmpyu		%fr4,%fr5,%fr6
 	fstds		%fr6,-16(%r30)
 	ldw		-16(%r30),%r28
 	ldo		-64(%r30),%r30
 	bv		0(%r2)
 	 fstws		%fr6R,0(%r26)
 
 	.exit
 	.procend
 
 
diff --git a/mpi/hppa1.1/mpih-mul2.S b/mpi/hppa1.1/mpih-mul2.S
index 1047ab56..4ba89714 100644
--- a/mpi/hppa1.1/mpih-mul2.S
+++ b/mpi/hppa1.1/mpih-mul2.S
@@ -1,117 +1,117 @@
 /* hppa1.1   addmul_1 -- Multiply a limb vector with a limb and add
  *			 the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1993, 1994, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,      (r26)
  *		     mpi_ptr_t s1_ptr,	     (r25)
  *		     mpi_size_t s1_size,     (r24)
  *		     mpi_limb_t s2_limb)     (r23)
  *
  * This runs at 11 cycles/limb on a PA7000.  With the used instructions, it
  * can not become faster due to data cache contention after a store.  On the
  * PA7100 it runs at 10 cycles/limb, and that can not be improved either,
  * since only the xmpyu does not need the integer pipeline, so the only
  * dual-issue we will get are addc+xmpyu.  Unrolling could gain a cycle/limb
  * on the PA7100.
  *
  * There are some ideas described in mul1.S that applies to this code too.
  */
 
 	.level		1.1
 
 	.code
 	.export 	_gcry_mpih_addmul_1
 	.label		_gcry_mpih_addmul_1
 	.proc
 	.callinfo	frame=64,no_calls
 	.entry
 
 	ldo		64(%r30),%r30
 	fldws,ma	4(%r25),%fr5
 	stw		%r23,-16(%r30)		; move s2_limb ...
 	addib,= 	-1,%r24,L$just_one_limb
 	 fldws		-16(%r30),%fr4		; ... into fr4
 	add		%r0,%r0,%r0		; clear carry
 	xmpyu		%fr4,%fr5,%fr6
 	fldws,ma	4(%r25),%fr7
 	fstds		%fr6,-16(%r30)
 	xmpyu		%fr4,%fr7,%fr8
 	ldw		-12(%r30),%r19		; least significant limb in product
 	ldw		-16(%r30),%r28
 
 	fstds		%fr8,-16(%r30)
 	addib,= 	-1,%r24,L$end
 	 ldw		-12(%r30),%r1
 
 ; Main loop
 	.label	L$loop
 	ldws		0(%r26),%r29
 	fldws,ma	4(%r25),%fr5
 	add		%r29,%r19,%r19
 	stws,ma 	%r19,4(%r26)
 	addc		%r28,%r1,%r19
 	xmpyu		%fr4,%fr5,%fr6
 	ldw		-16(%r30),%r28
 	fstds		%fr6,-16(%r30)
 	addc		%r0,%r28,%r28
 	addib,<>	-1,%r24,L$loop
 	 ldw		-12(%r30),%r1
 
 	.label	L$end
 	ldw		0(%r26),%r29
 	add		%r29,%r19,%r19
 	stws,ma 	%r19,4(%r26)
 	addc		%r28,%r1,%r19
 	ldw		-16(%r30),%r28
 	ldws		0(%r26),%r29
 	addc		%r0,%r28,%r28
 	add		%r29,%r19,%r19
 	stws,ma 	%r19,4(%r26)
 	addc		%r0,%r28,%r28
 	bv		0(%r2)
 	 ldo		-64(%r30),%r30
 
 	.label L$just_one_limb
 	xmpyu		%fr4,%fr5,%fr6
 	ldw		0(%r26),%r29
 	fstds		%fr6,-16(%r30)
 	ldw		-12(%r30),%r1
 	ldw		-16(%r30),%r28
 	add		%r29,%r1,%r19
 	stw		%r19,0(%r26)
 	addc		%r0,%r28,%r28
 	bv		0(%r2)
 	 ldo		-64(%r30),%r30
 
 	.exit
 	.procend
 
 
diff --git a/mpi/hppa1.1/mpih-mul3.S b/mpi/hppa1.1/mpih-mul3.S
index 632adf1e..d7cff234 100644
--- a/mpi/hppa1.1/mpih-mul3.S
+++ b/mpi/hppa1.1/mpih-mul3.S
@@ -1,126 +1,126 @@
 /* hppa1.1   submul_1 -- Multiply a limb vector with a limb and add
  *			 the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1993, 1994, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,      (r26)
  *		     mpi_ptr_t s1_ptr,	     (r25)
  *		     mpi_size_t s1_size,     (r24)
  *		     mpi_limb_t s2_limb)     (r23)
  *
  *
  * This runs at 12 cycles/limb on a PA7000.  With the used instructions, it
  * can not become faster due to data cache contention after a store.  On the
  * PA7100 it runs at 11 cycles/limb, and that can not be improved either,
  * since only the xmpyu does not need the integer pipeline, so the only
  * dual-issue we will get are addc+xmpyu.  Unrolling could gain a cycle/limb
  * on the PA7100.
  *
  * There are some ideas described in mul1.S that applies to this code too.
  *
  * It seems possible to make this run as fast as addmul_1, if we use
  *	 sub,>>= %r29,%r19,%r22
  *	 addi	 1,%r28,%r28
  * but that requires reworking the hairy software pipeline...
  */
 
 	.level		1.1
 
 	.code
 	.export 	_gcry_mpih_submul_1
 	.label		_gcry_mpih_submul_1
 	.proc
 	.callinfo	frame=64,no_calls
 	.entry
 
 	ldo		64(%r30),%r30
 	fldws,ma	4(%r25),%fr5
 	stw		%r23,-16(%r30)		; move s2_limb ...
 	addib,= 	-1,%r24,L$just_one_limb
 	 fldws		-16(%r30),%fr4		; ... into fr4
 	add		%r0,%r0,%r0		; clear carry
 	xmpyu		%fr4,%fr5,%fr6
 	fldws,ma	4(%r25),%fr7
 	fstds		%fr6,-16(%r30)
 	xmpyu		%fr4,%fr7,%fr8
 	ldw		-12(%r30),%r19		; least significant limb in product
 	ldw		-16(%r30),%r28
 
 	fstds		%fr8,-16(%r30)
 	addib,= 	-1,%r24,L$end
 	 ldw		-12(%r30),%r1
 
 ; Main loop
 	.label	L$loop
 	ldws		0(%r26),%r29
 	fldws,ma	4(%r25),%fr5
 	sub		%r29,%r19,%r22
 	add		%r22,%r19,%r0
 	stws,ma 	%r22,4(%r26)
 	addc		%r28,%r1,%r19
 	xmpyu		%fr4,%fr5,%fr6
 	ldw		-16(%r30),%r28
 	fstds		%fr6,-16(%r30)
 	addc		%r0,%r28,%r28
 	addib,<>	-1,%r24,L$loop
 	 ldw		-12(%r30),%r1
 
 	.label	L$end
 	ldw		0(%r26),%r29
 	sub		%r29,%r19,%r22
 	add		%r22,%r19,%r0
 	stws,ma 	%r22,4(%r26)
 	addc		%r28,%r1,%r19
 	ldw		-16(%r30),%r28
 	ldws		0(%r26),%r29
 	addc		%r0,%r28,%r28
 	sub		%r29,%r19,%r22
 	add		%r22,%r19,%r0
 	stws,ma 	%r22,4(%r26)
 	addc		%r0,%r28,%r28
 	bv		0(%r2)
 	 ldo		-64(%r30),%r30
 
 	.label	L$just_one_limb
 	xmpyu		%fr4,%fr5,%fr6
 	ldw		0(%r26),%r29
 	fstds		%fr6,-16(%r30)
 	ldw		-12(%r30),%r1
 	ldw		-16(%r30),%r28
 	sub		%r29,%r1,%r22
 	add		%r22,%r1,%r0
 	stw		%r22,0(%r26)
 	addc		%r0,%r28,%r28
 	bv		0(%r2)
 	 ldo		-64(%r30),%r30
 
 	.exit
 	.procend
 
diff --git a/mpi/hppa1.1/udiv-qrnnd.S b/mpi/hppa1.1/udiv-qrnnd.S
index 3f28b7b6..ebb28d6c 100644
--- a/mpi/hppa1.1/udiv-qrnnd.S
+++ b/mpi/hppa1.1/udiv-qrnnd.S
@@ -1,92 +1,92 @@
 /* HP-PA  __udiv_qrnnd division support, used from longlong.h.
  *	  This version runs fast on PA 7000 and later.
  *
  *      Copyright (C) 1993, 1994, 1998, 
  *                    2001, 2002, 2004 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 /* INPUT PARAMETERS
  *   rem_ptr	   gr26
  *   n1 	   gr25
  *   n0 	   gr24
  *   d		   gr23
  */
 
 	.level		1.1
 
         .data
 	.align		8
 	.label L$0000
 	.word		0x43f00000
 	.word		0x0
         .code
 	.export 	__udiv_qrnnd
 	.label		__udiv_qrnnd
 	.proc
 	.callinfo	frame=64,no_calls
 	.entry
 	ldo		64(%r30),%r30
 
 	stws		%r25,-16(0,%r30)	; n_hi
 	stws		%r24,-12(0,%r30)	; n_lo
         stw             %r19,-32(%r30)
         addil           LT%L$0000,%r19
         ldw             RT%L$0000(%r1),%r1
 	fldds		-16(0,%r30),%fr5
 	stws		%r23,-12(0,%r30)
 	comib,<=	0,%r25,L$1
 	fcnvxf,dbl,dbl	%fr5,%fr5
         fldds           0(0,%r1),%fr4
 	fadd,dbl	%fr4,%fr5,%fr5
 	.label	L$1
 	fcpy,sgl	%fr0,%fr6L
 	fldws		-12(0,%r30),%fr6R
 	fcnvxf,dbl,dbl	%fr6,%fr4
 
 	fdiv,dbl	%fr5,%fr4,%fr5
 
 	fcnvfx,dbl,dbl	%fr5,%fr4
 	fstws		%fr4R,-16(%r30)
 	xmpyu		%fr4R,%fr6R,%fr6
 	ldws		-16(%r30),%r28
 	fstds		%fr6,-16(0,%r30)
 	ldws		-12(0,%r30),%r21
 	ldws		-16(0,%r30),%r20
 	sub		%r24,%r21,%r22
         subb            %r25,%r20,%r1
         comib,=         0,%r1,L$2
 	ldo		-64(%r30),%r30
 
 	add		%r22,%r23,%r22
 	ldo		-1(%r28),%r28
 	.label	L$2
 	bv		0(%r2)
 	stws		%r22,0(0,%r26)
 
 	.exit
 	.procend
 
diff --git a/mpi/i386/mpih-add1.S b/mpi/i386/mpih-add1.S
index 95a75890..7b458fc6 100644
--- a/mpi/i386/mpih-add1.S
+++ b/mpi/i386/mpih-add1.S
@@ -1,161 +1,161 @@
 /* i80386 add_n -- Add two limb vectors of the same length > 0 and store
  *		   sum in a third limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	(sp + 4)
  *		   mpi_ptr_t s1_ptr,	(sp + 8)
  *		   mpi_ptr_t s2_ptr,	(sp + 12)
  *		   mpi_size_t size)	(sp + 16)
  */
 
 .text
 	ALIGN (3)
 	.globl C_SYMBOL_NAME(_gcry_mpih_add_n)
 C_SYMBOL_NAME(_gcry_mpih_add_n:)
 	CFI_STARTPROC()
 	pushl %edi
 	CFI_PUSH(%edi)
 	pushl %esi
 	CFI_PUSH(%esi)
 
 	movl 12(%esp),%edi		/* res_ptr */
 	movl 16(%esp),%esi		/* s1_ptr */
 	movl 20(%esp),%edx		/* s2_ptr */
 	movl 24(%esp),%ecx		/* size */
 
 #if defined __CET__ && (__CET__ & 1) != 0
 	pushl	%ebx
 	CFI_PUSH(%ebx)
 #endif
 
 	movl	%ecx,%eax
 	shrl	$3,%ecx 		/* compute count for unrolled loop */
 	negl	%eax
 	andl	$7,%eax 		/* get index where to start loop */
 	jz	Loop			/* necessary special case for 0 */
 	incl	%ecx			/* adjust loop count */
 	shll	$2,%eax 		/* adjustment for pointers... */
 	subl	%eax,%edi		/* ... since they are offset ... */
 	subl	%eax,%esi		/* ... by a constant when we ... */
 	subl	%eax,%edx		/* ... enter the loop */
 	shrl	$2,%eax 		/* restore previous value */
 #if defined __CET__ && (__CET__ & 1) != 0
 	leal	-4(,%eax,4),%ebx	/* Count for 4-byte endbr32 */
 #endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  Due to limitations in some
    assemblers, Loop-L0-3 cannot be put into the leal */
 	call	L0
 	CFI_ADJUST_CFA_OFFSET(4)
 L0:	leal	(%eax,%eax,8),%eax
 	addl	(%esp),%eax
 	addl	$(Loop-L0-3),%eax
 	addl	$4,%esp
 	CFI_ADJUST_CFA_OFFSET(-4)
 #else
 /* Calculate start address in loop for non-PIC.  */
 	leal	(Loop - 3)(%eax,%eax,8),%eax
 #endif
 #if defined __CET__ && (__CET__ & 1) != 0
 	addl	%ebx,%eax		/* Adjust for endbr32 */
 #endif
 	jmp	*%eax			/* jump into loop */
 	ALIGN (3)
 Loop:	movl	(%esi),%eax
 	adcl	(%edx),%eax
 	movl	%eax,(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	4(%esi),%eax
 	adcl	4(%edx),%eax
 	movl	%eax,4(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	8(%esi),%eax
 	adcl	8(%edx),%eax
 	movl	%eax,8(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	12(%esi),%eax
 	adcl	12(%edx),%eax
 	movl	%eax,12(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	16(%esi),%eax
 	adcl	16(%edx),%eax
 	movl	%eax,16(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	20(%esi),%eax
 	adcl	20(%edx),%eax
 	movl	%eax,20(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	24(%esi),%eax
 	adcl	24(%edx),%eax
 	movl	%eax,24(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	28(%esi),%eax
 	adcl	28(%edx),%eax
 	movl	%eax,28(%edi)
 	leal	32(%edi),%edi
 	leal	32(%esi),%esi
 	leal	32(%edx),%edx
 	decl	%ecx
 	jnz	Loop
 
 	sbbl	%eax,%eax
 	negl	%eax
 
 #if defined __CET__ && (__CET__ & 1) != 0
 	popl	%ebx
 	CFI_POP(%ebx)
 #endif
 
 	popl %esi
 	CFI_POP(%esi)
 	popl %edi
 	CFI_POP(%edi)
 	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-lshift.S b/mpi/i386/mpih-lshift.S
index 3404cf55..409c6c59 100644
--- a/mpi/i386/mpih-lshift.S
+++ b/mpi/i386/mpih-lshift.S
@@ -1,102 +1,102 @@
 /* i80386   lshift
  *      Copyright (C) 1992, 1994, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_lshift( mpi_ptr_t wp,	(sp + 4)
  *		   mpi_ptr_t up,	(sp + 8)
  *		   mpi_size_t usize,	(sp + 12)
  *		   unsigned cnt)	(sp + 16)
  */
 
 .text
 	ALIGN (3)
 	.globl C_SYMBOL_NAME(_gcry_mpih_lshift)
 C_SYMBOL_NAME(_gcry_mpih_lshift:)
 	CFI_STARTPROC()
 	pushl	%edi
 	CFI_PUSH(%edi)
 	pushl	%esi
 	CFI_PUSH(%esi)
 	pushl	%ebx
 	CFI_PUSH(%ebx)
 
 	movl	16(%esp),%edi		/* res_ptr */
 	movl	20(%esp),%esi		/* s_ptr */
 	movl	24(%esp),%edx		/* size */
 	movl	28(%esp),%ecx		/* cnt */
 
 	subl	$4,%esi 		/* adjust s_ptr */
 
 	movl	(%esi,%edx,4),%ebx	/* read most significant limb */
 	xorl	%eax,%eax
 	shldl	%cl,%ebx,%eax		/* compute carry limb */
 	decl	%edx
 	jz	Lend
 	pushl	%eax			/* push carry limb onto stack */
 	testb	$1,%dl
 	jnz	L1			/* enter loop in the middle */
 	movl	%ebx,%eax
 
 	ALIGN (3)
 Loop:	movl	(%esi,%edx,4),%ebx	/* load next lower limb */
 	shldl	%cl,%ebx,%eax		/* compute result limb */
 	movl	%eax,(%edi,%edx,4)	/* store it */
 	decl	%edx
 L1:	movl	(%esi,%edx,4),%eax
 	shldl	%cl,%eax,%ebx
 	movl	%ebx,(%edi,%edx,4)
 	decl	%edx
 	jnz	Loop
 
 	shll	%cl,%eax		/* compute least significant limb */
 	movl	%eax,(%edi)		/* store it */
 
 	popl	%eax			/* pop carry limb */
 
 	popl	%ebx
 	popl	%esi
 	popl	%edi
 	ret_spec_stop
 
 Lend:	shll	%cl,%ebx		/* compute least significant limb */
 	movl	%ebx,(%edi)		/* store it */
 
 	popl	%ebx
 	CFI_POP(%ebx)
 	popl	%esi
 	CFI_POP(%esi)
 	popl	%edi
 	CFI_POP(%edi)
 	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-mul1.S b/mpi/i386/mpih-mul1.S
index a672d052..431730ce 100644
--- a/mpi/i386/mpih-mul1.S
+++ b/mpi/i386/mpih-mul1.S
@@ -1,94 +1,94 @@
 /* i80386 mul_1 -- Multiply a limb vector with a limb and store
  *			 the result in a second limb vector.
  *      Copyright (C) 1992, 1994, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,	(sp + 4)
  *		  mpi_ptr_t s1_ptr,	(sp + 8)
  *		  mpi_size_t s1_size,	(sp + 12)
  *		  mpi_limb_t s2_limb)	(sp + 16)
  */
 
 #define res_ptr edi
 #define s1_ptr	esi
 #define size	ecx
 #define s2_limb ebp
 
 	TEXT
 	ALIGN (3)
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_mul_1)
 C_SYMBOL_NAME(_gcry_mpih_mul_1:)
 
 	CFI_STARTPROC()
 	INSN1(push,l	,R(edi))
 	CFI_PUSH(%edi)
 	INSN1(push,l	,R(esi))
 	CFI_PUSH(%esi)
 	INSN1(push,l	,R(ebx))
 	CFI_PUSH(%ebx)
 	INSN1(push,l	,R(ebp))
 	CFI_PUSH(%ebp)
 
 	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
 	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
 	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
 	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
 
 	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
 	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
 	INSN1(neg,l	,R(size))
 	INSN2(xor,l	,R(ebx),R(ebx))
 	ALIGN (3)
 Loop:
 	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
 	INSN1(mul,l	,R(s2_limb))
 	INSN2(add,l	,R(eax),R(ebx))
 	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(eax))
 	INSN2(adc,l	,R(edx),$0)
 	INSN2(mov,l	,R(ebx),R(edx))
 
 	INSN1(inc,l	,R(size))
 	INSN1(jnz,	,Loop)
 	INSN2(mov,l	,R(eax),R(ebx))
 
 	INSN1(pop,l	,R(ebp))
 	CFI_POP(%ebp)
 	INSN1(pop,l	,R(ebx))
 	CFI_POP(%ebx)
 	INSN1(pop,l	,R(esi))
 	CFI_POP(%esi)
 	INSN1(pop,l	,R(edi))
 	CFI_POP(%edi)
 	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-mul2.S b/mpi/i386/mpih-mul2.S
index e09c3f7c..58a0d6e3 100644
--- a/mpi/i386/mpih-mul2.S
+++ b/mpi/i386/mpih-mul2.S
@@ -1,96 +1,96 @@
 /* i80386 addmul_1 -- Multiply a limb vector with a limb and add
  *		      the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,      (sp + 4)
  *		     mpi_ptr_t s1_ptr,	     (sp + 8)
  *		     mpi_size_t s1_size,     (sp + 12)
  *		     mpi_limb_t s2_limb)     (sp + 16)
  */
 
 #define res_ptr edi
 #define s1_ptr	esi
 #define size	ecx
 #define s2_limb ebp
 
 	TEXT
 	ALIGN (3)
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_addmul_1)
 C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
 
 	CFI_STARTPROC()
 	INSN1(push,l	,R(edi))
 	CFI_PUSH(%edi)
 	INSN1(push,l	,R(esi))
 	CFI_PUSH(%esi)
 	INSN1(push,l	,R(ebx))
 	CFI_PUSH(%ebx)
 	INSN1(push,l	,R(ebp))
 	CFI_PUSH(%ebp)
 
 	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
 	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
 	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
 	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
 
 	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
 	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
 	INSN1(neg,l	,R(size))
 	INSN2(xor,l	,R(ebx),R(ebx))
 	ALIGN (3)
 Loop:
 	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
 	INSN1(mul,l	,R(s2_limb))
 	INSN2(add,l	,R(eax),R(ebx))
 	INSN2(adc,l	,R(edx),$0)
 	INSN2(add,l	,MEM_INDEX(res_ptr,size,4),R(eax))
 	INSN2(adc,l	,R(edx),$0)
 	INSN2(mov,l	,R(ebx),R(edx))
 
 	INSN1(inc,l	,R(size))
 	INSN1(jnz,	,Loop)
 	INSN2(mov,l	,R(eax),R(ebx))
 
 	INSN1(pop,l	,R(ebp))
 	CFI_POP(%ebp)
 	INSN1(pop,l	,R(ebx))
 	CFI_POP(%ebx)
 	INSN1(pop,l	,R(esi))
 	CFI_POP(%esi)
 	INSN1(pop,l	,R(edi))
 	CFI_POP(%edi)
 	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-mul3.S b/mpi/i386/mpih-mul3.S
index 4112c699..327e6ac8 100644
--- a/mpi/i386/mpih-mul3.S
+++ b/mpi/i386/mpih-mul3.S
@@ -1,96 +1,96 @@
 /* i80386 submul_1 -- Multiply a limb vector with a limb and add
  *		      the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,      (sp + 4)
  *		     mpi_ptr_t s1_ptr,	     (sp + 8)
  *		     mpi_size_t s1_size,     (sp + 12)
  *		     mpi_limb_t s2_limb)     (sp + 16)
  */
 
 #define res_ptr edi
 #define s1_ptr	esi
 #define size	ecx
 #define s2_limb ebp
 
 	TEXT
 	ALIGN (3)
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_submul_1)
 C_SYMBOL_NAME(_gcry_mpih_submul_1:)
 
 	CFI_STARTPROC()
 	INSN1(push,l	,R(edi))
 	CFI_PUSH(%edi)
 	INSN1(push,l	,R(esi))
 	CFI_PUSH(%esi)
 	INSN1(push,l	,R(ebx))
 	CFI_PUSH(%ebx)
 	INSN1(push,l	,R(ebp))
 	CFI_PUSH(%ebp)
 
 	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
 	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
 	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
 	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
 
 	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
 	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
 	INSN1(neg,l	,R(size))
 	INSN2(xor,l	,R(ebx),R(ebx))
 	ALIGN (3)
 Loop:
 	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
 	INSN1(mul,l	,R(s2_limb))
 	INSN2(add,l	,R(eax),R(ebx))
 	INSN2(adc,l	,R(edx),$0)
 	INSN2(sub,l	,MEM_INDEX(res_ptr,size,4),R(eax))
 	INSN2(adc,l	,R(edx),$0)
 	INSN2(mov,l	,R(ebx),R(edx))
 
 	INSN1(inc,l	,R(size))
 	INSN1(jnz,	,Loop)
 	INSN2(mov,l	,R(eax),R(ebx))
 
 	INSN1(pop,l	,R(ebp))
 	CFI_POP(%ebp)
 	INSN1(pop,l	,R(ebx))
 	CFI_POP(%ebx)
 	INSN1(pop,l	,R(esi))
 	CFI_POP(%esi)
 	INSN1(pop,l	,R(edi))
 	CFI_POP(%edi)
 	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-rshift.S b/mpi/i386/mpih-rshift.S
index 5d34696c..18c36c0c 100644
--- a/mpi/i386/mpih-rshift.S
+++ b/mpi/i386/mpih-rshift.S
@@ -1,105 +1,105 @@
 /* i80386   rshift
  *
  *      Copyright (C) 1992, 1994, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_rshift( mpi_ptr_t wp,	(sp + 4)
  *		   mpi_ptr_t up,	(sp + 8)
  *		   mpi_size_t usize,	(sp + 12)
  *		   unsigned cnt)	(sp + 16)
  */
 
 .text
 	ALIGN (3)
 	.globl C_SYMBOL_NAME(_gcry_mpih_rshift)
 C_SYMBOL_NAME(_gcry_mpih_rshift:)
 	CFI_STARTPROC()
 	pushl	%edi
 	CFI_PUSH(%edi)
 	pushl	%esi
 	CFI_PUSH(%esi)
 	pushl	%ebx
 	CFI_PUSH(%ebx)
 
 	movl	16(%esp),%edi		/* wp */
 	movl	20(%esp),%esi		/* up */
 	movl	24(%esp),%edx		/* usize */
 	movl	28(%esp),%ecx		/* cnt */
 
 	leal	-4(%edi,%edx,4),%edi
 	leal	(%esi,%edx,4),%esi
 	negl	%edx
 
 	movl	(%esi,%edx,4),%ebx	/* read least significant limb */
 	xorl	%eax,%eax
 	shrdl	%cl,%ebx,%eax		/* compute carry limb */
 	incl	%edx
 	jz	Lend2
 	pushl	%eax			/* push carry limb onto stack */
 	testb	$1,%dl
 	jnz	L2			/* enter loop in the middle */
 	movl	%ebx,%eax
 
 	ALIGN (3)
 Loop2:	movl	 (%esi,%edx,4),%ebx	/* load next higher limb */
 	shrdl	%cl,%ebx,%eax		/* compute result limb */
 	movl	%eax,(%edi,%edx,4)	/* store it */
 	incl	%edx
 L2:	movl	(%esi,%edx,4),%eax
 	shrdl	%cl,%eax,%ebx
 	movl	%ebx,(%edi,%edx,4)
 	incl	%edx
 	jnz	Loop2
 
 	shrl	%cl,%eax		/* compute most significant limb */
 	movl	%eax,(%edi)		/* store it */
 
 	popl	%eax			/* pop carry limb */
 
 	popl	%ebx
 	popl	%esi
 	popl	%edi
 	ret_spec_stop
 
 Lend2:	shrl	%cl,%ebx		/* compute most significant limb */
 	movl	%ebx,(%edi)		/* store it */
 
 	popl	%ebx
 	CFI_POP(%ebx)
 	popl	%esi
 	CFI_POP(%esi)
 	popl	%edi
 	CFI_POP(%edi)
 	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-sub1.S b/mpi/i386/mpih-sub1.S
index 49477ae3..afd4c077 100644
--- a/mpi/i386/mpih-sub1.S
+++ b/mpi/i386/mpih-sub1.S
@@ -1,162 +1,162 @@
 /* i80386 sub_n -- Sub two limb vectors of the same length > 0 and store
  *		   sum in a third limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	(sp + 4)
  *		   mpi_ptr_t s1_ptr,	(sp + 8)
  *		   mpi_ptr_t s2_ptr,	(sp + 12)
  *		   mpi_size_t size)	(sp + 16)
  */
 
 
 .text
 	ALIGN (3)
 	.globl C_SYMBOL_NAME(_gcry_mpih_sub_n)
 C_SYMBOL_NAME(_gcry_mpih_sub_n:)
 	CFI_STARTPROC()
 	pushl %edi
 	CFI_PUSH(%edi)
 	pushl %esi
 	CFI_PUSH(%esi)
 
 	movl 12(%esp),%edi		/* res_ptr */
 	movl 16(%esp),%esi		/* s1_ptr */
 	movl 20(%esp),%edx		/* s2_ptr */
 	movl 24(%esp),%ecx		/* size */
 
 #if defined __CET__ && (__CET__ & 1) != 0
 	pushl	%ebx
 	CFI_PUSH(%ebx)
 #endif
 
 	movl	%ecx,%eax
 	shrl	$3,%ecx 		/* compute count for unrolled loop */
 	negl	%eax
 	andl	$7,%eax 		/* get index where to start loop */
 	jz	Loop			/* necessary special case for 0 */
 	incl	%ecx			/* adjust loop count */
 	shll	$2,%eax 		/* adjustment for pointers... */
 	subl	%eax,%edi		/* ... since they are offset ... */
 	subl	%eax,%esi		/* ... by a constant when we ... */
 	subl	%eax,%edx		/* ... enter the loop */
 	shrl	$2,%eax 		/* restore previous value */
 #if defined __CET__ && (__CET__ & 1) != 0
 	leal	-4(,%eax,4),%ebx	/* Count for 4-byte endbr32 */
 #endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  Due to limitations in some
    assemblers, Loop-L0-3 cannot be put into the leal */
 	call	L0
 	CFI_ADJUST_CFA_OFFSET(4)
 L0:	leal	(%eax,%eax,8),%eax
 	addl	(%esp),%eax
 	addl	$(Loop-L0-3),%eax
 	addl	$4,%esp
 	CFI_ADJUST_CFA_OFFSET(-4)
 #else
 /* Calculate start address in loop for non-PIC.  */
 	leal	(Loop - 3)(%eax,%eax,8),%eax
 #endif
 #if defined __CET__ && (__CET__ & 1) != 0
 	addl	%ebx,%eax		/* Adjust for endbr32 */
 #endif
 	jmp	*%eax			/* jump into loop */
 	ALIGN (3)
 Loop:	movl	(%esi),%eax
 	sbbl	(%edx),%eax
 	movl	%eax,(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	4(%esi),%eax
 	sbbl	4(%edx),%eax
 	movl	%eax,4(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	8(%esi),%eax
 	sbbl	8(%edx),%eax
 	movl	%eax,8(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	12(%esi),%eax
 	sbbl	12(%edx),%eax
 	movl	%eax,12(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	16(%esi),%eax
 	sbbl	16(%edx),%eax
 	movl	%eax,16(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	20(%esi),%eax
 	sbbl	20(%edx),%eax
 	movl	%eax,20(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	24(%esi),%eax
 	sbbl	24(%edx),%eax
 	movl	%eax,24(%edi)
 #ifdef _CET_ENDBR
 	_CET_ENDBR
 #endif
 	movl	28(%esi),%eax
 	sbbl	28(%edx),%eax
 	movl	%eax,28(%edi)
 	leal	32(%edi),%edi
 	leal	32(%esi),%esi
 	leal	32(%edx),%edx
 	decl	%ecx
 	jnz	Loop
 
 	sbbl	%eax,%eax
 	negl	%eax
 
 #if defined __CET__ && (__CET__ & 1) != 0
 	popl	%ebx
 	CFI_POP(%ebx)
 #endif
 
 	popl %esi
 	CFI_POP(%esi)
 	popl %edi
 	CFI_POP(%edi)
 	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/syntax.h b/mpi/i386/syntax.h
index af4d9e80..8dbdc52c 100644
--- a/mpi/i386/syntax.h
+++ b/mpi/i386/syntax.h
@@ -1,98 +1,98 @@
 /* syntax.h -- Definitions for x86 syntax variations.
  *
  *       Copyright (C) 1992, 1994, 1995, 1998, 
  *                     2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 
 #ifdef __i386__
 #ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
 # define CFI_STARTPROC()            .cfi_startproc
 # define CFI_ENDPROC()              .cfi_endproc
 # define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
 # define CFI_REL_OFFSET(reg,off)    .cfi_rel_offset reg, off
 # define CFI_RESTORE(reg)           .cfi_restore reg
 
 # define CFI_PUSH(reg) \
 	CFI_ADJUST_CFA_OFFSET(4); CFI_REL_OFFSET(reg, 0)
 # define CFI_POP(reg) \
 	CFI_ADJUST_CFA_OFFSET(-4); CFI_RESTORE(reg)
 #else
 # define CFI_STARTPROC()
 # define CFI_ENDPROC()
 # define CFI_ADJUST_CFA_OFFSET(off)
 # define CFI_REL_OFFSET(reg,off)
 # define CFI_RESTORE(reg)
 
 # define CFI_PUSH(reg)
 # define CFI_POP(reg)
 #endif
 #endif
 
 #undef ALIGN
 
 #if defined (BSD_SYNTAX) || defined (ELF_SYNTAX)
 #define R(r) %r
 #define MEM(base)(base)
 #define MEM_DISP(base,displacement)displacement(R(base))
 #define MEM_INDEX(base,index,size)(R(base),R(index),size)
 #ifdef __STDC__
 #define INSN1(mnemonic,size_suffix,dst)mnemonic##size_suffix dst
 #define INSN2(mnemonic,size_suffix,dst,src)mnemonic##size_suffix src,dst
 #else
 #define INSN1(mnemonic,size_suffix,dst)mnemonic/**/size_suffix dst
 #define INSN2(mnemonic,size_suffix,dst,src)mnemonic/**/size_suffix src,dst
 #endif
 #define TEXT .text
 #if defined (BSD_SYNTAX)
 #define ALIGN(log) .align log
 #endif
 #if defined (ELF_SYNTAX)
 #define ALIGN(log) .align 1<<(log)
 #endif
 #define GLOBL .globl
 #endif
 
 #ifdef INTEL_SYNTAX
 #define R(r) r
 #define MEM(base)[base]
 #define MEM_DISP(base,displacement)[base+(displacement)]
 #define MEM_INDEX(base,index,size)[base+index*size]
 #define INSN1(mnemonic,size_suffix,dst)mnemonic dst
 #define INSN2(mnemonic,size_suffix,dst,src)mnemonic dst,src
 #define TEXT .text
 #define ALIGN(log) .align log
 #define GLOBL .globl
 #endif
 
 #ifdef X86_BROKEN_ALIGN
 #undef ALIGN
 #define ALIGN(log) .align log,0x90
 #endif
 
 /* 'ret' instruction replacement for straight-line speculation mitigation */
 #define ret_spec_stop \
 	ret; int3;
diff --git a/mpi/longlong.h b/mpi/longlong.h
index fb860cb6..a0e19279 100644
--- a/mpi/longlong.h
+++ b/mpi/longlong.h
@@ -1,1810 +1,1810 @@
 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
    Note: This is the Libgcrypt version
 
 
 Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
               2000, 2001, 2002, 2003, 2004, 2011 Free Software Foundation, Inc.
 
 This file is free software; you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License as published by
 the Free Software Foundation; either version 2.1 of the License, or (at your
 option) any later version.
 
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 License for more details.
 
 You should have received a copy of the GNU Library General Public License
-along with this file; see the file COPYING.LIB.  If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-MA 02111-1307, USA. */
+along with this file; see the file COPYING.LIB.  If not, see <https://www.gnu.org/licenses/>.
+SPDX-License-Identifier: LGPL-2.1-or-later
+*/
 
 /* On 32-bit, use 64-bit 'unsigned long long' for UDWtype, if available. */
 #if !defined (UDWtype) && SIZEOF_UNSIGNED_LONG_LONG * 8 == W_TYPE_SIZE * 2
 #  define UDWtype unsigned long long
 #endif
 
 /* On 64-bit, use 128-bit 'unsigned __int128' for UDWtype, if available. */
 #if !defined (UDWtype) && SIZEOF_UNSIGNED___INT128 * 8 == W_TYPE_SIZE * 2
 #  define UDWtype unsigned __int128
 #endif
 
 /* You have to define the following before including this file:
 
    UWtype -- An unsigned type, default type for operations (typically a "word").
    UHWtype -- An unsigned type, at least half the size of UWtype.
    UDWtype -- An unsigned type, at least twice as large a UWtype.
    W_TYPE_SIZE -- size in bits of UWtype.
 
    SItype, USItype -- Signed and unsigned 32 bit types.
    DItype, UDItype -- Signed and unsigned 64 bit types.
 
    On a 32 bit machine UWtype should typically be USItype.
    On a 64 bit machine, UWtype should typically be UDItype.
 */
 
 #define __BITS4 (W_TYPE_SIZE / 4)
 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
 
 /* This is used to make sure no undesirable sharing between different libraries
    that use this file takes place.  */
 #ifndef __MPN
 # define __MPN(x) __##x
 #endif
 
 /* Define auxiliary asm macros.
 
    1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
    UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
    word product in HIGH_PROD and LOW_PROD.
 
    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
    UDWtype product.  This is just a variant of umul_ppmm.
 
    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
    denominator) divides a UDWtype, composed by the UWtype integers
    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
    in QUOTIENT and the remainder in REMAINDER.	HIGH_NUMERATOR must be less
    than DENOMINATOR for correct operation.  If, in addition, the most
    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
    UDIV_NEEDS_NORMALIZATION is defined to 1.
 
    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
    is rounded towards 0.
 
    5) count_leading_zeros(count, x) counts the number of zero-bits from the
    msb to the first non-zero bit in the UWtype X.  This is the number of
    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
 
    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
    from the least significant end.
 
    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
    high_addend_2, low_addend_2) adds two UWtype integers, composed by
    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
    (i.e. carry out) is not stored anywhere, and is lost.
 
    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
    and LOW_DIFFERENCE.	Overflow (i.e. carry out) is not stored anywhere,
    and is lost.
 
    If any of these macros are left undefined for a particular CPU,
    C macros are used.  */
 
 /* The CPUs come in alphabetical order below.
 
    Please add support for more CPUs here, or improve the current support
    for the CPUs below!	*/
 
 #ifdef __riscos__
 #pragma continue_after_hash_error
 #else /* !__riscos__ */
 #if defined (__GNUC__) && !defined (NO_ASM)
 
 /* We sometimes need to clobber "cc" with gcc2, but that would not be
    understood by gcc1.	Use cpp to avoid major code duplication.  */
 #if __GNUC__ < 2
 # define __CLOBBER_CC
 # define __AND_CLOBBER_CC
 #else /* __GNUC__ >= 2 */
 # define __CLOBBER_CC : "cc"
 # define __AND_CLOBBER_CC , "cc"
 #endif /* __GNUC__ < 2 */
 
 /***************************************
  ****  Begin CPU Specific Versions  ****
  ***************************************/
 
 /***************************************
  **************  A29K  *****************
  ***************************************/
 #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add %1,%4,%5\n"   \
            "addc %0,%2,%3"                                              \
 	   : "=r" ((USItype)(sh)),                                      \
 	    "=&r" ((USItype)(sl))                                       \
 	   : "%r" ((USItype)(ah)),                                      \
 	     "rI" ((USItype)(bh)),                                      \
 	     "%r" ((USItype)(al)),                                      \
 	     "rI" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub %1,%4,%5\n"                                             \
 	   "subc %0,%2,%3"                                              \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "r" ((USItype)(ah)),                                       \
 	     "rI" ((USItype)(bh)),                                      \
 	     "r" ((USItype)(al)),                                       \
 	     "rI" ((USItype)(bl)))
 # define umul_ppmm(xh, xl, m0, m1) \
   do {									\
     USItype __m0 = (m0), __m1 = (m1);					\
     __asm__ ("multiplu %0,%1,%2"                                        \
 	     : "=r" ((USItype)(xl))                                     \
 	     : "r" (__m0),                                              \
 	       "r" (__m1));                                             \
     __asm__ ("multmu %0,%1,%2"                                          \
 	     : "=r" ((USItype)(xh))                                     \
 	     : "r" (__m0),                                              \
 	       "r" (__m1));                                             \
   } while (0)
 # define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("dividu %0,%3,%4"                                            \
 	   : "=r" ((USItype)(q)),                                       \
 	     "=q" ((USItype)(r))                                        \
 	   : "1" ((USItype)(n1)),                                       \
 	     "r" ((USItype)(n0)),                                       \
 	     "r" ((USItype)(d)))
 # define count_leading_zeros(count, x) \
     __asm__ ("clz %0,%1"                                                \
 	     : "=r" ((USItype)(count))                                  \
 	     : "r" ((USItype)(x)))
 # define COUNT_LEADING_ZEROS_0 32
 #endif /* __a29k__ */
 
 
 #if defined (__alpha) && W_TYPE_SIZE == 64
 # define umul_ppmm(ph, pl, m0, m1) \
   do {									\
     UDItype __m0 = (m0), __m1 = (m1);					\
     __asm__ ("umulh %r1,%2,%0"                                          \
 	     : "=r" ((UDItype)(ph))                                     \
 	     : "%rJ" (__m0),                                            \
 	       "rI" (__m1));                                            \
     (pl) = __m0 * __m1; 						\
   } while (0)
 # define UMUL_TIME 46
 # ifndef LONGLONG_STANDALONE
 #  define udiv_qrnnd(q, r, n1, n0, d) \
   do { UDItype __r;							\
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); 			\
     (r) = __r;								\
   } while (0)
 extern UDItype __udiv_qrnnd ();
 #  define UDIV_TIME 220
 # endif /* !LONGLONG_STANDALONE */
 #endif /* __alpha */
 
 /***************************************
  **************  ARM  ******************
  ***************************************/
 #if defined (__arm__) && W_TYPE_SIZE == 32 && \
     (!defined (__thumb__) || defined (__thumb2__))
 /* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
 # ifndef __ARM_ARCH
 #  ifdef __ARM_ARCH_2__
 #   define __ARM_ARCH 2
 #  elif defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
 #   define __ARM_ARCH 3
 #  elif defined (__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__)
 #   define __ARM_ARCH 4
 #  elif defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5E__) \
         || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
         || defined(__ARM_ARCH_5TEJ__)
 #   define __ARM_ARCH 5
 #  elif defined (__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
         || defined (__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
         || defined (__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
 #   define __ARM_ARCH 6
 #  elif defined (__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
         || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
         || defined(__ARM_ARCH_7EM__)
 #   define __ARM_ARCH 7
 #  else
    /* could not detect? */
 #  endif
 # endif /* !__ARM_ARCH */
 
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("adds %1, %4, %5\n"                                          \
 	   "adc  %0, %2, %3"                                            \
 	   : "=r" ((sh)),                                               \
 	     "=&r" ((sl))                                               \
 	   : "%r" ((USItype)(ah)),                                      \
 	     "rI" ((USItype)(bh)),                                      \
 	     "%r" ((USItype)(al)),                                      \
 	     "rI" ((USItype)(bl)) __CLOBBER_CC)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subs %1, %4, %5\n"                                          \
 	   "sbc  %0, %2, %3"                                            \
 	   : "=r" ((sh)),                                               \
 	     "=&r" ((sl))                                               \
 	   : "r" ((USItype)(ah)),                                       \
 	     "rI" ((USItype)(bh)),                                      \
 	     "r" ((USItype)(al)),                                       \
 	     "rI" ((USItype)(bl)) __CLOBBER_CC)
 # if (defined __ARM_ARCH && __ARM_ARCH <= 3)
 #  define umul_ppmm(xh, xl, a, b) \
   __asm__ ("@ Inlined umul_ppmm\n"                                      \
 	"mov	%|r0, %2, lsr #16		@ AAAA\n"               \
 	"mov	%|r2, %3, lsr #16		@ BBBB\n"               \
 	"bic	%|r1, %2, %|r0, lsl #16		@ aaaa\n"               \
 	"bic	%0, %3, %|r2, lsl #16		@ bbbb\n"               \
 	"mul	%1, %|r1, %|r2			@ aaaa * BBBB\n"        \
 	"mul	%|r2, %|r0, %|r2		@ AAAA * BBBB\n"        \
 	"mul	%|r1, %0, %|r1			@ aaaa * bbbb\n"        \
 	"mul	%0, %|r0, %0			@ AAAA * bbbb\n"        \
 	"adds	%|r0, %1, %0			@ central sum\n"        \
 	"addcs	%|r2, %|r2, #65536\n"                                   \
 	"adds	%1, %|r1, %|r0, lsl #16\n"                              \
 	"adc	%0, %|r2, %|r0, lsr #16"                                \
 	   : "=&r" ((xh)),                                              \
 	     "=r" ((xl))                                                \
 	   : "r" ((USItype)(a)),                                        \
 	     "r" ((USItype)(b))                                         \
 	   : "r0", "r1", "r2" __AND_CLOBBER_CC)
 # else /* __ARM_ARCH >= 4 */
 #  define umul_ppmm(xh, xl, a, b)                                         \
   __asm__ ("@ Inlined umul_ppmm\n"                                      \
 	   "umull %1, %0, %2, %3"                                       \
 		   : "=&r" ((xh)),                                      \
 		     "=r" ((xl))                                        \
 		   : "r" ((USItype)(a)),                                \
 		     "r" ((USItype)(b)))
 # endif /* __ARM_ARCH >= 4 */
 # define UMUL_TIME 20
 # define UDIV_TIME 100
 # if (defined __ARM_ARCH && __ARM_ARCH >= 5)
 #  define count_leading_zeros(count, x) \
   __asm__ ("clz %0, %1"                                                 \
 		   : "=r" ((count))                                     \
 		   : "r" ((USItype)(x)))
 # endif /* __ARM_ARCH >= 5 */
 #endif /* __arm__ */
 
 /***************************************
  **********  ARM64 / Aarch64  **********
  ***************************************/
 #if defined(__aarch64__) && W_TYPE_SIZE == 64
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("adds %1, %4, %5\n"                                          \
            "adc  %0, %2, %3\n"                                          \
            : "=r" ((sh)),                                               \
              "=&r" ((sl))                                               \
            : "r" ((UDItype)(ah)),                                       \
              "r" ((UDItype)(bh)),                                       \
              "r" ((UDItype)(al)),                                       \
              "r" ((UDItype)(bl)) __CLOBBER_CC)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subs %1, %4, %5\n"                                          \
            "sbc  %0, %2, %3\n"                                          \
            : "=r" ((sh)),                                               \
              "=&r" ((sl))                                               \
            : "r" ((UDItype)(ah)),                                       \
              "r" ((UDItype)(bh)),                                       \
              "r" ((UDItype)(al)),                                       \
              "r" ((UDItype)(bl)) __CLOBBER_CC)
 # define umul_ppmm(ph, pl, m0, m1) \
   do {                                                                  \
     UDItype __m0 = (m0), __m1 = (m1), __ph;                             \
     (pl) = __m0 * __m1;                                                 \
     __asm__ ("umulh %0,%1,%2"                                           \
              : "=r" (__ph)                                              \
              : "r" (__m0),                                              \
                "r" (__m1));                                             \
     (ph) = __ph; \
   } while (0)
 # define count_leading_zeros(count, x) \
   do {                                                                  \
     UDItype __co;                                                       \
     __asm__ ("clz %0, %1\n"                                             \
              : "=r" (__co)                                              \
              : "r" ((UDItype)(x)));                                     \
     (count) = __co;                                                     \
   } while (0)
 #endif /* __aarch64__ */
 
 /***************************************
  **************  CLIPPER  **************
  ***************************************/
 #if defined (__clipper__) && W_TYPE_SIZE == 32
 # define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;						\
 	   struct {USItype __l, __h;} __i;				\
 	  } __xx;							\
   __asm__ ("mulwux %2,%0"                                               \
 	   : "=r" (__xx.__ll)                                           \
 	   : "%0" ((USItype)(u)),                                       \
 	     "r" ((USItype)(v)));                                       \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 # define smul_ppmm(w1, w0, u, v) \
   ({union {DItype __ll; 						\
 	   struct {SItype __l, __h;} __i;				\
 	  } __xx;							\
   __asm__ ("mulwx %2,%0"                                                \
 	   : "=r" (__xx.__ll)                                           \
 	   : "%0" ((SItype)(u)),                                        \
 	     "r" ((SItype)(v)));                                        \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 # define __umulsidi3(u, v) \
   ({UDItype __w;							\
     __asm__ ("mulwux %2,%0"                                             \
 	     : "=r" (__w)                                               \
 	     : "%0" ((USItype)(u)),                                     \
 	       "r" ((USItype)(v)));                                     \
     __w; })
 #endif /* __clipper__ */
 
 
 /***************************************
  **************  GMICRO  ***************
  ***************************************/
 #if defined (__gmicro__) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add.w %5,%1\n"                                              \
 	   "addx %3,%0"                                                 \
 	   : "=g" ((USItype)(sh)),                                      \
 	     "=&g" ((USItype)(sl))                                      \
 	   : "%0" ((USItype)(ah)),                                      \
 	     "g" ((USItype)(bh)),                                       \
 	     "%1" ((USItype)(al)),                                      \
 	     "g" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub.w %5,%1\n"                                              \
 	   "subx %3,%0"                                                 \
 	   : "=g" ((USItype)(sh)),                                      \
 	     "=&g" ((USItype)(sl))                                      \
 	   : "0" ((USItype)(ah)),                                       \
 	     "g" ((USItype)(bh)),                                       \
 	     "1" ((USItype)(al)),                                       \
 	     "g" ((USItype)(bl)))
 # define umul_ppmm(ph, pl, m0, m1) \
   __asm__ ("mulx %3,%0,%1"                                              \
 	   : "=g" ((USItype)(ph)),                                      \
 	     "=r" ((USItype)(pl))                                       \
 	   : "%0" ((USItype)(m0)),                                      \
 	     "g" ((USItype)(m1)))
 # define udiv_qrnnd(q, r, nh, nl, d) \
   __asm__ ("divx %4,%0,%1"                                              \
 	   : "=g" ((USItype)(q)),                                       \
 	     "=r" ((USItype)(r))                                        \
 	   : "1" ((USItype)(nh)),                                       \
 	     "0" ((USItype)(nl)),                                       \
 	     "g" ((USItype)(d)))
 # define count_leading_zeros(count, x) \
   __asm__ ("bsch/1 %1,%0"                                               \
 	   : "=g" (count)                                               \
 	   : "g" ((USItype)(x)),                                        \
 	     "0" ((USItype)0))
 #endif
 
 
 /***************************************
  **************  HPPA  *****************
  ***************************************/
 #if defined (__hppa) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("	add %4,%5,%1\n"                                             \
  	   "	addc %2,%3,%0"                                              \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "%rM" ((USItype)(ah)),                                     \
 	     "rM" ((USItype)(bh)),                                      \
 	     "%rM" ((USItype)(al)),                                     \
 	     "rM" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("	sub %4,%5,%1\n"                                             \
 	   "	subb %2,%3,%0"                                              \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "rM" ((USItype)(ah)),                                      \
 	     "rM" ((USItype)(bh)),                                      \
 	     "rM" ((USItype)(al)),                                      \
 	     "rM" ((USItype)(bl)))
 # if defined (_PA_RISC1_1)
 #  define umul_ppmm(wh, wl, u, v) \
   do {									\
     union {UDItype __ll;						\
 	   struct {USItype __h, __l;} __i;				\
 	  } __xx;							\
     __asm__ ("	xmpyu %1,%2,%0"                                           \
 	     : "=*f" (__xx.__ll)                                        \
 	     : "*f" ((USItype)(u)),                                     \
 	       "*f" ((USItype)(v)));                                    \
     (wh) = __xx.__i.__h;						\
     (wl) = __xx.__i.__l;						\
   } while (0)
 #  define UMUL_TIME 8
 #  define UDIV_TIME 60
 # else
 #  define UMUL_TIME 40
 #  define UDIV_TIME 80
 # endif
 # if !defined(LONGLONG_STANDALONE) && !defined(ASM_DISABLED)
 #  define udiv_qrnnd(q, r, n1, n0, d) \
   do { USItype __r;							\
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); 			\
     (r) = __r;								\
   } while (0)
 extern USItype __udiv_qrnnd ();
 # endif /* !LONGLONG_STANDALONE && !ASM_DISABLED */
 # define count_leading_zeros(count, x) \
   do {								       \
     USItype __tmp;						       \
     __asm__ (				                               \
        "	ldi             1,%0                                       \n" \
        "	extru,= 	%1,15,16,%%r0  ; Bits 31..16 zero?         \n" \
        "	extru,tr	%1,15,16,%1    ; No.  Shift down, skip add.\n" \
        "	ldo		16(%0),%0      ; Yes.	Perform add.       \n" \
        "	extru,= 	%1,23,8,%%r0   ; Bits 15..8 zero?          \n" \
        "	extru,tr	%1,23,8,%1     ; No.  Shift down, skip add.\n" \
        "	ldo		8(%0),%0       ; Yes.	Perform add.       \n" \
        "	extru,= 	%1,27,4,%%r0   ; Bits 7..4 zero?           \n" \
        "	extru,tr	%1,27,4,%1     ; No.  Shift down, skip add.\n" \
        "	ldo		4(%0),%0       ; Yes.	Perform add.       \n" \
        "	extru,= 	%1,29,2,%%r0   ; Bits 3..2 zero?           \n" \
        "	extru,tr	%1,29,2,%1     ; No.  Shift down, skip add.\n" \
        "	ldo		2(%0),%0       ; Yes.	Perform add.       \n" \
        "	extru		%1,30,1,%1     ; Extract bit 1.            \n" \
        "	sub		%0,%1,%0       ; Subtract it.              "   \
        : "=r" (count), "=r" (__tmp) : "1" (x));                        \
   } while (0)
 #endif /* hppa */
 
 
 /***************************************
  **************  I370  *****************
  ***************************************/
 #if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 # define umul_ppmm(xh, xl, m0, m1) \
   do {									\
     union {UDItype __ll;						\
 	   struct {USItype __h, __l;} __i;				\
 	  } __xx;							\
     USItype __m0 = (m0), __m1 = (m1);					\
     __asm__ ("mr %0,%3"                                                 \
 	     : "=r" (__xx.__i.__h),                                     \
 	       "=r" (__xx.__i.__l)                                      \
 	     : "%1" (__m0),                                             \
 	       "r" (__m1));                                             \
     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
     (xh) += ((((SItype) __m0 >> 31) & __m1)				\
 	     + (((SItype) __m1 >> 31) & __m0)); 			\
   } while (0)
 # define smul_ppmm(xh, xl, m0, m1) \
   do {									\
     union {DItype __ll; 						\
 	   struct {USItype __h, __l;} __i;				\
 	  } __xx;							\
     __asm__ ("mr %0,%3"                                                 \
 	     : "=r" (__xx.__i.__h),                                     \
 	       "=r" (__xx.__i.__l)                                      \
 	     : "%1" (m0),                                               \
 	       "r" (m1));                                               \
     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
   } while (0)
 # define sdiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     union {DItype __ll; 						\
 	   struct {USItype __h, __l;} __i;				\
 	  } __xx;							\
     __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
     __asm__ ("dr %0,%2"                                                 \
 	     : "=r" (__xx.__ll)                                         \
 	     : "0" (__xx.__ll), "r" (d));                               \
     (q) = __xx.__i.__l; (r) = __xx.__i.__h;				\
   } while (0)
 #endif
 
 
 /***************************************
  **************  I386  *****************
  ***************************************/
 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addl %5,%1\n"                                               \
 	   "adcl %3,%0"                                                 \
 	   : "=r" ((sh)),                                               \
 	     "=&r" ((sl))                                               \
 	   : "%0" ((USItype)(ah)),                                      \
 	     "g" ((USItype)(bh)),                                       \
 	     "%1" ((USItype)(al)),                                      \
 	     "g" ((USItype)(bl))                                        \
 	   __CLOBBER_CC)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subl %5,%1\n"                                               \
 	   "sbbl %3,%0"                                                 \
 	   : "=r" ((sh)),                                               \
 	     "=&r" ((sl))                                               \
 	   : "0" ((USItype)(ah)),                                       \
 	     "g" ((USItype)(bh)),                                       \
 	     "1" ((USItype)(al)),                                       \
 	     "g" ((USItype)(bl))                                        \
 	   __CLOBBER_CC)
 # define umul_ppmm(w1, w0, u, v) \
   __asm__ ("mull %3"                                                    \
 	   : "=a" ((w0)),                                               \
 	     "=d" ((w1))                                                \
 	   : "%0" ((USItype)(u)),                                       \
 	     "rm" ((USItype)(v))                                        \
 	   __CLOBBER_CC)
 # define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("divl %4"                                                    \
 	   : "=a" ((q)),                                                \
 	     "=d" ((r))                                                 \
 	   : "0" ((USItype)(n0)),                                       \
 	     "1" ((USItype)(n1)),                                       \
 	     "rm" ((USItype)(d))                                        \
 	   __CLOBBER_CC)
 # define count_leading_zeros(count, x) \
   do {									\
     USItype __cbtmp;							\
     __asm__ ("bsrl %1,%0"                                               \
 	     : "=r" (__cbtmp) : "rm" ((USItype)(x))                     \
 	     __CLOBBER_CC);						\
     (count) = __cbtmp ^ 31;						\
   } while (0)
 # define count_trailing_zeros(count, x) \
   __asm__ ("rep;bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)) __CLOBBER_CC)
 # ifndef UMUL_TIME
 #  define UMUL_TIME 40
 # endif
 # ifndef UDIV_TIME
 #  define UDIV_TIME 40
 # endif
 #endif /* 80x86 */
 
 /***************************************
  *********** AMD64 / x86-64 ************
  ***************************************/
 #if defined(__x86_64) && W_TYPE_SIZE == 64
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addq %5,%1\n"                                               \
 	   "adcq %3,%0"                                                 \
 	   : "=r" ((sh)),                                               \
 	     "=&r" ((sl))                                               \
 	   : "0" ((UDItype)(ah)),                                       \
 	     "g"  ((UDItype)(bh)),                                      \
 	     "1" ((UDItype)(al)),                                       \
 	     "g"  ((UDItype)(bl))                                       \
 	   __CLOBBER_CC)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subq %5,%1\n"                                               \
 	   "sbbq %3,%0"                                                 \
 	   : "=r" ((sh)),                                               \
 	     "=&r" ((sl))                                               \
 	   : "0" ((UDItype)(ah)),                                       \
 	     "g" ((UDItype)(bh)),                                       \
 	     "1" ((UDItype)(al)),                                       \
 	     "g" ((UDItype)(bl))                                        \
 	   __CLOBBER_CC)
 # define umul_ppmm(w1, w0, u, v) \
   __asm__ ("mulq %3"                                                    \
 	   : "=a" ((w0)),                                               \
 	     "=d" ((w1))                                                \
 	   : "0" ((UDItype)(u)),                                        \
 	     "rm" ((UDItype)(v))                                        \
 	   __CLOBBER_CC)
 # define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("divq %4"                                                    \
 	   : "=a" ((q)),                                                \
 	     "=d" ((r))                                                 \
 	   : "0" ((UDItype)(n0)),                                       \
 	     "1" ((UDItype)(n1)),                                       \
 	     "rm" ((UDItype)(d))                                        \
 	   __CLOBBER_CC)
 # define count_leading_zeros(count, x) \
   do {                                                                  \
     UDItype __cbtmp;                                                    \
     __asm__ ("bsrq %1,%0"                                               \
              : "=r" (__cbtmp) : "rm" ((UDItype)(x))                     \
              __CLOBBER_CC);                                             \
     (count) = __cbtmp ^ 63;                                             \
   } while (0)
 # define count_trailing_zeros(count, x) \
   do {                                                                  \
     UDItype __cbtmp;                                                    \
     __asm__ ("rep;bsfq %1,%0"                                           \
              : "=r" (__cbtmp) : "rm" ((UDItype)(x))                     \
              __CLOBBER_CC);                                             \
     (count) = __cbtmp;                                                  \
   } while (0)
 # ifndef UMUL_TIME
 #  define UMUL_TIME 40
 # endif
 # ifndef UDIV_TIME
 #  define UDIV_TIME 40
 # endif
 #endif /* __x86_64 */
 
 
 /***************************************
  **************  I860  *****************
  ***************************************/
 #if defined (__i860__) && W_TYPE_SIZE == 32
 # define rshift_rhlc(r,h,l,c) \
   __asm__ ("shr %3,r0,r0\n"  \
            "shrd %1,%2,%0"   \
 	   "=r" (r) : "r" (h), "r" (l), "rn" (c))
 #endif /* i860 */
 
 /***************************************
  **************  I960  *****************
  ***************************************/
 #if defined (__i960__) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("cmpo 1,0\n"      \
            "addc %5,%4,%1\n" \
            "addc %3,%2,%0"   \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "%dI" ((USItype)(ah)),                                     \
 	     "dI" ((USItype)(bh)),                                      \
 	     "%dI" ((USItype)(al)),                                     \
 	     "dI" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("cmpo 0,0\n"      \
            "subc %5,%4,%1\n" \
            "subc %3,%2,%0"   \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "dI" ((USItype)(ah)),                                      \
 	     "dI" ((USItype)(bh)),                                      \
 	     "dI" ((USItype)(al)),                                      \
 	     "dI" ((USItype)(bl)))
 # define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;						\
 	   struct {USItype __l, __h;} __i;				\
 	  } __xx;							\
   __asm__ ("emul        %2,%1,%0"                                       \
 	   : "=d" (__xx.__ll)                                           \
 	   : "%dI" ((USItype)(u)),                                      \
 	     "dI" ((USItype)(v)));                                      \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 # define __umulsidi3(u, v) \
   ({UDItype __w;							\
     __asm__ ("emul      %2,%1,%0"                                       \
 	     : "=d" (__w)                                               \
 	     : "%dI" ((USItype)(u)),                                    \
 	       "dI" ((USItype)(v)));                                    \
     __w; })
 # define udiv_qrnnd(q, r, nh, nl, d) \
   do {									\
     union {UDItype __ll;						\
 	   struct {USItype __l, __h;} __i;				\
 	  } __nn;							\
     __nn.__i.__h = (nh); __nn.__i.__l = (nl);				\
     __asm__ ("ediv %d,%n,%0"                                            \
 	   : "=d" (__rq.__ll)                                           \
 	   : "dI" (__nn.__ll),                                          \
 	     "dI" ((USItype)(d)));                                      \
     (r) = __rq.__i.__l; (q) = __rq.__i.__h;				\
   } while (0)
 # define count_leading_zeros(count, x) \
   do {									\
     USItype __cbtmp;							\
     __asm__ ("scanbit %1,%0"                                            \
 	     : "=r" (__cbtmp)                                           \
 	     : "r" ((USItype)(x)));                                     \
     (count) = __cbtmp ^ 31;						\
   } while (0)
 # define COUNT_LEADING_ZEROS_0 (-32) /* sic */
 # if defined (__i960mx)  /* what is the proper symbol to test??? */
 #  define rshift_rhlc(r,h,l,c) \
   do {									\
     union {UDItype __ll;						\
 	   struct {USItype __l, __h;} __i;				\
 	  } __nn;							\
     __nn.__i.__h = (h); __nn.__i.__l = (l);				\
     __asm__ ("shre %2,%1,%0"                                            \
 	     : "=d" (r) : "dI" (__nn.__ll), "dI" (c));                  \
   }
 # endif /* i960mx */
 #endif /* i960 */
 
 
 /***************************************
  **************  68000	****************
  ***************************************/
 #if (defined (__mc68000__) || defined (__mc68020__)                     \
      || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add%.l %5,%1\n"                                             \
 	   "addx%.l %3,%0"                                              \
 	   : "=d" ((USItype)(sh)),                                      \
 	     "=&d" ((USItype)(sl))                                      \
 	   : "%0" ((USItype)(ah)),                                      \
 	     "d" ((USItype)(bh)),                                       \
 	     "%1" ((USItype)(al)),                                      \
 	     "g" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub%.l %5,%1\n"                                             \
 	   "subx%.l %3,%0"                                              \
 	   : "=d" ((USItype)(sh)),                                      \
 	     "=&d" ((USItype)(sl))                                      \
 	   : "0" ((USItype)(ah)),                                       \
 	     "d" ((USItype)(bh)),                                       \
 	     "1" ((USItype)(al)),                                       \
 	     "g" ((USItype)(bl)))
 # if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
 #  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("mulu%.l %3,%1:%0"                                           \
 	   : "=d" ((USItype)(w0)),                                      \
 	     "=d" ((USItype)(w1))                                       \
 	   : "%0" ((USItype)(u)),                                       \
 	     "dmi" ((USItype)(v)))
 #  define UMUL_TIME 45
 #  define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("divu%.l %4,%1:%0"                                           \
 	   : "=d" ((USItype)(q)),                                       \
 	     "=d" ((USItype)(r))                                        \
 	   : "0" ((USItype)(n0)),                                       \
 	     "1" ((USItype)(n1)),                                       \
 	     "dmi" ((USItype)(d)))
 #  define UDIV_TIME 90
 #  define sdiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("divs%.l %4,%1:%0"                                           \
 	   : "=d" ((USItype)(q)),                                       \
 	     "=d" ((USItype)(r))                                        \
 	   : "0" ((USItype)(n0)),                                       \
 	     "1" ((USItype)(n1)),                                       \
 	     "dmi" ((USItype)(d)))
 #  define count_leading_zeros(count, x) \
   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 	   : "=d" ((USItype)(count))                                    \
 	   : "od" ((USItype)(x)), "n" (0))
 #  define COUNT_LEADING_ZEROS_0 32
 # else /* not mc68020 */
 #  define umul_ppmm(xh, xl, a, b) \
   do { USItype __umul_tmp1, __umul_tmp2;			  \
 	__asm__ ("| Inlined umul_ppmm                         \n" \
  "        move%.l %5,%3                                       \n" \
  "        move%.l %2,%0                                       \n" \
  "        move%.w %3,%1                                       \n" \
  "        swap	%3                                            \n" \
  "        swap	%0                                            \n" \
  "        mulu	%2,%1                                         \n" \
  "        mulu	%3,%0                                         \n" \
  "        mulu	%2,%3                                         \n" \
  "        swap	%2                                            \n" \
  "        mulu	%5,%2                                         \n" \
  "        add%.l	%3,%2                                 \n" \
  "        jcc	1f                                            \n" \
  "        add%.l	%#0x10000,%0                          \n" \
  "1:	move%.l %2,%3                                         \n" \
  "        clr%.w	%2                                    \n" \
  "        swap	%2                                            \n" \
  "        swap	%3                                            \n" \
  "        clr%.w	%3                                    \n" \
  "        add%.l	%3,%1                                 \n" \
  "        addx%.l %2,%0                                       \n" \
  "        | End inlined umul_ppmm"                                \
 	      : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)),     \
 		"=d" (__umul_tmp1), "=&d" (__umul_tmp2)           \
 	      : "%2" ((USItype)(a)), "d" ((USItype)(b)));         \
   } while (0)
 #  define UMUL_TIME 100
 #  define UDIV_TIME 400
 # endif /* not mc68020 */
 #endif /* mc68000 */
 
 
 /***************************************
  **************  88000	****************
  ***************************************/
 #if defined (__m88000__) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addu.co %1,%r4,%r5\n"                                       \
 	   "addu.ci %0,%r2,%r3"                                         \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "%rJ" ((USItype)(ah)),                                     \
 	     "rJ" ((USItype)(bh)),                                      \
 	     "%rJ" ((USItype)(al)),                                     \
 	     "rJ" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subu.co %1,%r4,%r5\n"                                       \
 	   "subu.ci %0,%r2,%r3"                                         \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "rJ" ((USItype)(ah)),                                      \
 	     "rJ" ((USItype)(bh)),                                      \
 	     "rJ" ((USItype)(al)),                                      \
 	     "rJ" ((USItype)(bl)))
 # define count_leading_zeros(count, x) \
   do {									\
     USItype __cbtmp;							\
     __asm__ ("ff1 %0,%1"                                                \
 	     : "=r" (__cbtmp)                                           \
 	     : "r" ((USItype)(x)));                                     \
     (count) = __cbtmp ^ 31;						\
   } while (0)
 # define COUNT_LEADING_ZEROS_0 63 /* sic */
 # if defined (__m88110__)
 #  define umul_ppmm(wh, wl, u, v) \
   do {									\
     union {UDItype __ll;						\
 	   struct {USItype __h, __l;} __i;				\
 	  } __x;							\
     __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v));   \
     (wh) = __x.__i.__h; 						\
     (wl) = __x.__i.__l; 						\
   } while (0)
 #  define udiv_qrnnd(q, r, n1, n0, d) \
   ({union {UDItype __ll;						\
 	   struct {USItype __h, __l;} __i;				\
 	  } __x, __q;							\
   __x.__i.__h = (n1); __x.__i.__l = (n0);				\
   __asm__ ("divu.d %0,%1,%2"                                            \
 	   : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d));                \
   (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
 #  define UMUL_TIME 5
 #  define UDIV_TIME 25
 # else
 #  define UMUL_TIME 17
 #  define UDIV_TIME 150
 # endif /* __m88110__ */
 #endif /* __m88000__ */
 
 /***************************************
  **************  MIPS  *****************
  ***************************************/
 #if defined (__mips__) && W_TYPE_SIZE == 32
 # if defined (__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && \
                                                __GNUC_MINOR__ >= 4)
 #  define umul_ppmm(w1, w0, u, v) \
   do {                                                                  \
     UDItype __r;                                                        \
     __r = (UDItype)(u) * (v);                                           \
     (w1) = __r >> 32;                                                   \
     (w0) = (USItype) __r;                                               \
   } while (0)
 # elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
 #  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("multu %2,%3"                                                \
 	   : "=l" ((USItype)(w0)),                                      \
 	     "=h" ((USItype)(w1))                                       \
 	   : "d" ((USItype)(u)),                                        \
 	     "d" ((USItype)(v)))
 # else
 #  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("multu %2,%3 \n" \
 	   "mflo %0 \n"     \
 	   "mfhi %1"                                                    \
 	   : "=d" ((USItype)(w0)),                                      \
 	     "=d" ((USItype)(w1))                                       \
 	   : "d" ((USItype)(u)),                                        \
 	     "d" ((USItype)(v)))
 # endif
 # define UMUL_TIME 10
 # define UDIV_TIME 100
 #endif /* __mips__ */
 
 /***************************************
  **************  MIPS/64  **************
  ***************************************/
 #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
 # if defined (__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && \
                                                __GNUC_MINOR__ >= 4)
 typedef unsigned int UTItype __attribute__ ((mode (TI)));
 #  define umul_ppmm(w1, w0, u, v) \
   do {                                                                  \
     UTItype __r;                                                        \
     __r = (UTItype)(u) * (v);                                           \
     (w1) = __r >> 64;                                                   \
     (w0) = (UDItype) __r;                                               \
   } while (0)
 # elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
 #  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("dmultu %2,%3"                                               \
 	   : "=l" ((UDItype)(w0)),                                      \
 	     "=h" ((UDItype)(w1))                                       \
 	   : "d" ((UDItype)(u)),                                        \
 	     "d" ((UDItype)(v)))
 # else
 #  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("dmultu %2,%3 \n"    \
 	   "mflo %0 \n"         \
 	   "mfhi %1"                                                    \
 	   : "=d" ((UDItype)(w0)),                                      \
 	     "=d" ((UDItype)(w1))                                       \
 	   : "d" ((UDItype)(u)),                                        \
 	     "d" ((UDItype)(v)))
 # endif
 # define UMUL_TIME 20
 # define UDIV_TIME 140
 #endif /* __mips__ */
 
 
 /***************************************
  **************  32000	****************
  ***************************************/
 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 # define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;						\
 	   struct {USItype __l, __h;} __i;				\
 	  } __xx;							\
   __asm__ ("meid %2,%0"                                                 \
 	   : "=g" (__xx.__ll)                                           \
 	   : "%0" ((USItype)(u)),                                       \
 	     "g" ((USItype)(v)));                                       \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 # define __umulsidi3(u, v) \
   ({UDItype __w;							\
     __asm__ ("meid %2,%0"                                               \
 	     : "=g" (__w)                                               \
 	     : "%0" ((USItype)(u)),                                     \
 	       "g" ((USItype)(v)));                                     \
     __w; })
 # define udiv_qrnnd(q, r, n1, n0, d) \
   ({union {UDItype __ll;						\
 	   struct {USItype __l, __h;} __i;				\
 	  } __xx;							\
   __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
   __asm__ ("deid %2,%0"                                                 \
 	   : "=g" (__xx.__ll)                                           \
 	   : "0" (__xx.__ll),                                           \
 	     "g" ((USItype)(d)));                                       \
   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 # define count_trailing_zeros(count,x) \
   do {
     __asm__ ("ffsd      %2,%0"                                          \
 	     : "=r" ((USItype) (count))                                 \
 	     : "0" ((USItype) 0),                                       \
 	       "r" ((USItype) (x)));                                    \
   } while (0)
 #endif /* __ns32000__ */
 
 
 /***************************************
  **************  PPC  ******************
  ***************************************/
 /* Powerpc 32 bit support taken from GCC longlong.h. */
 #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (bh) && (bh) == 0)				\
       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
               __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
               __CLOBBER_CC);						\
     else								\
       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
               : "=r" (sh), "=&r" (sl)					\
               : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)		\
               __CLOBBER_CC);						\
   } while (0)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (ah) && (ah) == 0)				\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
               __CLOBBER_CC);						\
     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
               __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == 0)			\
       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
               __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
               __CLOBBER_CC);						\
     else								\
       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
               : "=r" (sh), "=&r" (sl)					\
               : "r" (ah), "r" (bh), "rI" (al), "r" (bl)			\
               __CLOBBER_CC);						\
   } while (0)
 # define count_leading_zeros(count, x) \
   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
 # define COUNT_LEADING_ZEROS_0 32
 # define umul_ppmm(ph, pl, m0, m1) \
   do {									\
     USItype __m0 = (m0), __m1 = (m1);					\
     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
     (pl) = __m0 * __m1;							\
   } while (0)
 # define UMUL_TIME 15
 # define smul_ppmm(ph, pl, m0, m1) \
   do {									\
     SItype __m0 = (m0), __m1 = (m1);					\
     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
     (pl) = __m0 * __m1;							\
   } while (0)
 # define SMUL_TIME 14
 # define UDIV_TIME 120
 #endif /* 32-bit POWER architecture variants.  */
 
 /* Powerpc 64 bit support taken from GCC longlong.h. */
 /* We should test _IBMR2 here when we add assembly support for the system
    vendor compilers.  */
 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (bh) && (bh) == 0)				\
       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
               __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
               __CLOBBER_CC);						\
     else								\
       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
               : "=r" (sh), "=&r" (sl)					\
               : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)		\
               __CLOBBER_CC);						\
   } while (0)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (ah) && (ah) == 0)				\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
               __CLOBBER_CC);						\
     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
               __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == 0)			\
       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
               __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
               __CLOBBER_CC);						\
     else								\
       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
               : "=r" (sh), "=&r" (sl)					\
               : "r" (ah), "r" (bh), "rI" (al), "r" (bl)			\
               __CLOBBER_CC);						\
   } while (0)
 # define count_leading_zeros(count, x) \
   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 # define COUNT_LEADING_ZEROS_0 64
 # define umul_ppmm(ph, pl, m0, m1) \
   do {									\
     UDItype __m0 = (m0), __m1 = (m1);					\
     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
     (pl) = __m0 * __m1;							\
   } while (0)
 # define UMUL_TIME 15
 # define smul_ppmm(ph, pl, m0, m1) \
   do {									\
     DItype __m0 = (m0), __m1 = (m1);					\
     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
     (pl) = __m0 * __m1;							\
   } while (0)
 # define SMUL_TIME 14  /* ??? */
 # define UDIV_TIME 120 /* ??? */
 #endif /* 64-bit PowerPC.  */
 
 /***************************************
  **************  PYR  ******************
  ***************************************/
 #if defined (__pyr__) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addw        %5,%1 \n" \
 	   "addwc	%3,%0"                                          \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "%0" ((USItype)(ah)),                                      \
 	     "g" ((USItype)(bh)),                                       \
 	     "%1" ((USItype)(al)),                                      \
 	     "g" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subw        %5,%1 \n" \
 	   "subwb	%3,%0"                                          \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "0" ((USItype)(ah)),                                       \
 	     "g" ((USItype)(bh)),                                       \
 	     "1" ((USItype)(al)),                                       \
 	     "g" ((USItype)(bl)))
 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
 # define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;						\
 	   struct {USItype __h, __l;} __i;				\
 	  } __xx;							\
   __asm__ ("movw %1,%R0 \n" \
 	   "uemul %2,%0"                                                \
 	   : "=&r" (__xx.__ll)                                          \
 	   : "g" ((USItype) (u)),                                       \
 	     "g" ((USItype)(v)));                                       \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 #endif /* __pyr__ */
 
 
 /***************************************
  **************  RT/ROMP  **************
  ***************************************/
 #if defined (__ibm032__) /* RT/ROMP */	&& W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("a %1,%5 \n" \
 	   "ae %0,%3"                                                   \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "%0" ((USItype)(ah)),                                      \
 	     "r" ((USItype)(bh)),                                       \
 	     "%1" ((USItype)(al)),                                      \
 	     "r" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("s %1,%5\n" \
 	   "se %0,%3"                                                   \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "0" ((USItype)(ah)),                                       \
 	     "r" ((USItype)(bh)),                                       \
 	     "1" ((USItype)(al)),                                       \
 	     "r" ((USItype)(bl)))
 # define umul_ppmm(ph, pl, m0, m1) \
   do {									\
     USItype __m0 = (m0), __m1 = (m1);					\
     __asm__ (								\
        "s       r2,r2    \n" \
        "mts	r10,%2   \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "m	r2,%3    \n" \
        "cas	%0,r2,r0 \n" \
        "mfs	r10,%1"                                                 \
 	     : "=r" ((USItype)(ph)),                                    \
 	       "=r" ((USItype)(pl))                                     \
 	     : "%r" (__m0),                                             \
 		"r" (__m1)                                              \
 	     : "r2");                                                   \
     (ph) += ((((SItype) __m0 >> 31) & __m1)				\
 	     + (((SItype) __m1 >> 31) & __m0)); 			\
   } while (0)
 # define UMUL_TIME 20
 # define UDIV_TIME 200
 # define count_leading_zeros(count, x) \
   do {									\
     if ((x) >= 0x10000) 						\
       __asm__ ("clz     %0,%1"                                          \
 	       : "=r" ((USItype)(count))                                \
 	       : "r" ((USItype)(x) >> 16));                             \
     else								\
       { 								\
 	__asm__ ("clz   %0,%1"                                          \
 		 : "=r" ((USItype)(count))                              \
 		 : "r" ((USItype)(x)));                                 \
 	(count) += 16;							\
       } 								\
   } while (0)
 #endif /* RT/ROMP */
 
 
 /***************************************
  **************  SH2  ******************
  ***************************************/
 #if (defined (__sh2__) || defined(__sh3__) || defined(__SH4__) ) \
     && W_TYPE_SIZE == 32
 # define umul_ppmm(w1, w0, u, v) \
   __asm__ (								\
         "dmulu.l %2,%3\n"  \
 	"sts	macl,%1\n" \
 	"sts	mach,%0"                                                \
 	   : "=r" ((USItype)(w1)),                                      \
 	     "=r" ((USItype)(w0))                                       \
 	   : "r" ((USItype)(u)),                                        \
 	     "r" ((USItype)(v))                                         \
 	   : "macl", "mach")
 # define UMUL_TIME 5
 #endif
 
 /***************************************
  **************  SPARC	****************
  ***************************************/
 #if defined (__sparc__) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addcc %r4,%5,%1\n" \
 	   "addx %r2,%3,%0"                                             \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "%rJ" ((USItype)(ah)),                                     \
 	     "rI" ((USItype)(bh)),                                      \
 	     "%rJ" ((USItype)(al)),                                     \
 	     "rI" ((USItype)(bl))                                       \
 	   __CLOBBER_CC)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subcc %r4,%5,%1\n" \
 	   "subx %r2,%3,%0"                                             \
 	   : "=r" ((USItype)(sh)),                                      \
 	     "=&r" ((USItype)(sl))                                      \
 	   : "rJ" ((USItype)(ah)),                                      \
 	     "rI" ((USItype)(bh)),                                      \
 	     "rJ" ((USItype)(al)),                                      \
 	     "rI" ((USItype)(bl))                                       \
 	   __CLOBBER_CC)
 # if defined (__sparc_v8__) || defined(__sparcv8)
 /* Don't match immediate range because, 1) it is not often useful,
    2) the 'I' flag thinks of the range as a 13 bit signed interval,
    while we want to match a 13 bit interval, sign extended to 32 bits,
    but INTERPRETED AS UNSIGNED.  */
 #  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
 	   : "=r" ((USItype)(w1)),                                      \
 	     "=r" ((USItype)(w0))                                       \
 	   : "r" ((USItype)(u)),                                        \
 	     "r" ((USItype)(v)))
 #  define UMUL_TIME 5
 #  ifndef SUPERSPARC	/* SuperSPARC's udiv only handles 53 bit dividends */
 #   define udiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     USItype __q;							\
     __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"                     \
 	     : "=r" ((USItype)(__q))                                    \
 	     : "r" ((USItype)(n1)),                                     \
 	       "r" ((USItype)(n0)),                                     \
 	       "r" ((USItype)(d)));                                     \
     (r) = (n0) - __q * (d);						\
     (q) = __q;								\
   } while (0)
 #   define UDIV_TIME 25
 #  endif /*!SUPERSPARC */
 # else /* ! __sparc_v8__ */
 #  if defined (__sparclite__)
 /* This has hardware multiply but not divide.  It also has two additional
    instructions scan (ffs from high bit) and divscc.  */
 #   define umul_ppmm(w1, w0, u, v) \
   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
 	   : "=r" ((USItype)(w1)),                                      \
 	     "=r" ((USItype)(w0))                                       \
 	   : "r" ((USItype)(u)),                                        \
 	     "r" ((USItype)(v)))
 #   define UMUL_TIME 5
 #   define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("! Inlined udiv_qrnnd                                     \n" \
  "        wr	%%g0,%2,%%y	! Not a delayed write for sparclite  \n" \
  "        tst	%%g0                                                 \n" \
  "        divscc	%3,%4,%%g1                                   \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%%g1                                 \n" \
  "        divscc	%%g1,%4,%0                                   \n" \
  "        rd	%%y,%1                                               \n" \
  "        bl,a 1f                                                    \n" \
  "        add	%1,%4,%1                                             \n" \
  "1:	! End of inline udiv_qrnnd"                                     \
 	   : "=r" ((USItype)(q)),                                       \
 	     "=r" ((USItype)(r))                                        \
 	   : "r" ((USItype)(n1)),                                       \
 	     "r" ((USItype)(n0)),                                       \
 	     "rI" ((USItype)(d))                                        \
 	   : "%g1" __AND_CLOBBER_CC)
 #   define UDIV_TIME 37
 #   define count_leading_zeros(count, x) \
   __asm__ ("scan %1,0,%0"                                               \
 	   : "=r" ((USItype)(x))                                        \
 	   : "r" ((USItype)(count)))
 /* Early sparclites return 63 for an argument of 0, but they warn that future
    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
    undefined.  */
 #  endif /* !__sparclite__ */
 # endif /* !__sparc_v8__ */
 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
 # ifndef umul_ppmm
 #  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("! Inlined umul_ppmm                                        \n" \
  "        wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr  \n" \
  "        sra	%3,31,%%g2	! Don't move this insn                 \n" \
  "        and	%2,%%g2,%%g2	! Don't move this insn                 \n" \
  "        andcc	%%g0,0,%%g1	! Don't move this insn                 \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,%3,%%g1                                   \n" \
  "        mulscc	%%g1,0,%%g1                                    \n" \
  "        add	%%g1,%%g2,%0                                           \n" \
  "        rd	%%y,%1"                                                 \
 	   : "=r" ((USItype)(w1)),                                      \
 	     "=r" ((USItype)(w0))                                       \
 	   : "%rI" ((USItype)(u)),                                      \
 	     "r" ((USItype)(v))                                         \
 	   : "%g1", "%g2" __AND_CLOBBER_CC)
 #  define UMUL_TIME 39		/* 39 instructions */
 # endif /* umul_ppmm */
 # ifndef udiv_qrnnd
 #  ifndef LONGLONG_STANDALONE
 #   define udiv_qrnnd(q, r, n1, n0, d) \
   do { USItype __r;							\
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); 			\
     (r) = __r;								\
   } while (0)
 extern USItype __udiv_qrnnd ();
 #   define UDIV_TIME 140
 #  endif /* LONGLONG_STANDALONE */
 # endif /* udiv_qrnnd */
 #endif /* __sparc__ */
 
 
 /***************************************
  **************  VAX  ******************
  ***************************************/
 #if defined (__vax__) && W_TYPE_SIZE == 32
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addl2 %5,%1\n" \
 	   "adwc %3,%0"                                                 \
 	   : "=g" ((USItype)(sh)),                                      \
 	     "=&g" ((USItype)(sl))                                      \
 	   : "%0" ((USItype)(ah)),                                      \
 	     "g" ((USItype)(bh)),                                       \
 	     "%1" ((USItype)(al)),                                      \
 	     "g" ((USItype)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subl2 %5,%1\n" \
 	   "sbwc %3,%0"                                                 \
 	   : "=g" ((USItype)(sh)),                                      \
 	     "=&g" ((USItype)(sl))                                      \
 	   : "0" ((USItype)(ah)),                                       \
 	     "g" ((USItype)(bh)),                                       \
 	     "1" ((USItype)(al)),                                       \
 	     "g" ((USItype)(bl)))
 # define umul_ppmm(xh, xl, m0, m1) \
   do {									\
     union {UDItype __ll;						\
 	   struct {USItype __l, __h;} __i;				\
 	  } __xx;							\
     USItype __m0 = (m0), __m1 = (m1);					\
     __asm__ ("emul %1,%2,$0,%0"                                         \
 	     : "=g" (__xx.__ll)                                         \
 	     : "g" (__m0),                                              \
 	       "g" (__m1));                                             \
     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
     (xh) += ((((SItype) __m0 >> 31) & __m1)				\
 	     + (((SItype) __m1 >> 31) & __m0)); 			\
   } while (0)
 # define sdiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     union {DItype __ll; 						\
 	   struct {SItype __l, __h;} __i;				\
 	  } __xx;							\
     __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
     __asm__ ("ediv %3,%2,%0,%1"                                         \
 	     : "=g" (q), "=g" (r)                                       \
 	     : "g" (__xx.__ll), "g" (d));                               \
   } while (0)
 #endif /* __vax__ */
 
 
 /***************************************
  **************  Z8000	****************
  ***************************************/
 #if defined (__z8000__) && W_TYPE_SIZE == 16
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
 	   : "=r" ((unsigned int)(sh)),                                 \
 	     "=&r" ((unsigned int)(sl))                                 \
 	   : "%0" ((unsigned int)(ah)),                                 \
 	     "r" ((unsigned int)(bh)),                                  \
 	     "%1" ((unsigned int)(al)),                                 \
 	     "rQR" ((unsigned int)(bl)))
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
 	   : "=r" ((unsigned int)(sh)),                                 \
 	     "=&r" ((unsigned int)(sl))                                 \
 	   : "0" ((unsigned int)(ah)),                                  \
 	     "r" ((unsigned int)(bh)),                                  \
 	     "1" ((unsigned int)(al)),                                  \
 	     "rQR" ((unsigned int)(bl)))
 # define umul_ppmm(xh, xl, m0, m1) \
   do {									\
     union {long int __ll;						\
 	   struct {unsigned int __h, __l;} __i; 			\
 	  } __xx;							\
     unsigned int __m0 = (m0), __m1 = (m1);				\
     __asm__ ("mult      %S0,%H3"                                        \
 	     : "=r" (__xx.__i.__h),                                     \
 	       "=r" (__xx.__i.__l)                                      \
 	     : "%1" (__m0),                                             \
 	       "rQR" (__m1));                                           \
     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
     (xh) += ((((signed int) __m0 >> 15) & __m1) 			\
 	     + (((signed int) __m1 >> 15) & __m0));			\
   } while (0)
 #endif /* __z8000__ */
 
 
 /***************************************
  *********** s390x/zSeries  ************
  ***************************************/
 #if defined (__s390x__) && W_TYPE_SIZE == 64 && __GNUC__ >= 4
 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("algr %1,%5\n"                                               \
 	   "alcgr %0,%3\n"                                              \
 	   : "=r" ((sh)),                                               \
 	     "=&r" ((sl))                                               \
 	   : "0" ((UDItype)(ah)),                                       \
 	     "r"  ((UDItype)(bh)),                                      \
 	     "1" ((UDItype)(al)),                                       \
 	     "r"  ((UDItype)(bl))                                       \
 	   __CLOBBER_CC)
 # define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("slgr %1,%5\n"                                               \
 	   "slbgr %0,%3\n"                                              \
 	   : "=r" ((sh)),                                               \
 	     "=&r" ((sl))                                               \
 	   : "0" ((UDItype)(ah)),                                       \
 	     "r" ((UDItype)(bh)),                                       \
 	     "1" ((UDItype)(al)),                                       \
 	     "r" ((UDItype)(bl))                                        \
 	   __CLOBBER_CC)
 typedef unsigned int UTItype __attribute__ ((mode (TI)));
 #  define umul_ppmm(w1, w0, u, v) \
   do {                                                                  \
     UTItype ___r;                                                       \
     __asm__ ("mlgr %0,%2"                                               \
 	     : "=r" (___r)                                              \
 	     : "0" ((UDItype)(u)),                                      \
 	       "r" ((UDItype)(v)));                                     \
     (w1) = ___r >> 64;                                                  \
     (w0) = (UDItype) ___r;                                              \
   } while (0)
 # define udiv_qrnnd(q, r, n1, n0, d) \
   do {                                                                  \
     UTItype ___r = ((UTItype)n1 << 64) | n0;                            \
     __asm__ ("dlgr %0,%2"                                               \
 	     : "=r" (___r)                                              \
 	     : "0" (___r),                                              \
 	       "r" ((UDItype)(d)));                                     \
     (r) = ___r >> 64;                                                   \
     (q) = (UDItype) ___r;                                               \
   } while (0)
 #endif /* __s390x__ */
 
 
 /***************************************
  *****  End CPU Specific Versions  *****
  ***************************************/
 
 #endif /* __GNUC__ */
 #endif /* !__riscos__ */
 
 
 /***************************************
  ***********  Generic Versions	********
  ***************************************/
 #if !defined (umul_ppmm) && defined (__umulsidi3)
 #  define umul_ppmm(ph, pl, m0, m1) \
   {									\
     UDWtype __ll = __umulsidi3 (m0, m1);				\
     ph = (UWtype) (__ll >> W_TYPE_SIZE);				\
     pl = (UWtype) __ll; 						\
   }
 #endif
 
 #if !defined (__umulsidi3)
 #  define __umulsidi3(u, v) \
   ({UWtype __hi, __lo;							\
     umul_ppmm (__hi, __lo, u, v);					\
     ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
 #endif
 
 /* If this machine has no inline assembler, use C macros.  */
 
 #if !defined (add_ssaaaa) && defined (UDWtype)
 /* Use double word type when available. */
 #  define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     UDWtype __audw = (ah);						\
     UDWtype __budw = (bh);						\
     __audw <<= W_TYPE_SIZE;						\
     __audw |= (al);							\
     __budw <<= W_TYPE_SIZE;						\
     __budw |= (bl);							\
     __audw += __budw;							\
     (sh) = (UWtype)(__audw >> W_TYPE_SIZE);				\
     (sl) = (UWtype)(__audw); 						\
   } while (0)
 #elif !defined (add_ssaaaa)
 #  define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     UWtype __x; 							\
     __x = (al) + (bl);							\
     (sh) = (ah) + (bh) + (__x < (al));					\
     (sl) = __x; 							\
   } while (0)
 #endif
 
 #if !defined (sub_ddmmss) && defined (UDWtype)
 /* Use double word type when available. */
 #  define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     UDWtype __audw = (ah);						\
     UDWtype __budw = (bh);						\
     __audw <<= W_TYPE_SIZE;						\
     __audw |= (al);							\
     __budw <<= W_TYPE_SIZE;						\
     __budw |= (bl);							\
     __audw -= __budw;							\
     (sh) = (UWtype)(__audw >> W_TYPE_SIZE);				\
     (sl) = (UWtype)(__audw); 						\
   } while (0)
 #elif !defined (sub_ddmmss)
 #  define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     UWtype __x; 							\
     __x = (al) - (bl);							\
     (sh) = (ah) - (bh) - (__x > (al));					\
     (sl) = __x; 							\
   } while (0)
 #endif
 
 #if !defined (umul_ppmm) && defined (UDWtype)
 #  define umul_ppmm(w1, w0, u, v) 					\
   do {									\
     UDWtype __x = (u);							\
     __x *= (v);								\
     (w1) = (UWtype)(__x >> W_TYPE_SIZE);				\
     (w0) = (UWtype)(__x);						\
   } while (0)
 #elif !defined (umul_ppmm)
 #  define umul_ppmm(w1, w0, u, v) 					\
   do {									\
     UWtype __x0, __x1, __x2, __x3;					\
     UHWtype __ul, __vl, __uh, __vh;					\
     UWtype __u = (u), __v = (v);					\
 									\
     __ul = __ll_lowpart (__u);						\
     __uh = __ll_highpart (__u); 					\
     __vl = __ll_lowpart (__v);						\
     __vh = __ll_highpart (__v); 					\
 									\
     __x0 = (UWtype) __ul * __vl;					\
     __x1 = (UWtype) __ul * __vh;					\
     __x2 = (UWtype) __uh * __vl;					\
     __x3 = (UWtype) __uh * __vh;					\
 									\
     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
     __x1 += __x2;		/* but this indeed can */		\
     if (__x1 < __x2)		/* did we get it? */			\
       __x3 += __ll_B;		/* yes, add it in the proper pos. */	\
 									\
     (w1) = __x3 + __ll_highpart (__x1); 				\
     (w0) = (__ll_lowpart (__x1) << W_TYPE_SIZE/2) + __ll_lowpart (__x0);\
   } while (0)
 #endif
 
 #if !defined (smul_ppmm)
 #  define smul_ppmm(w1, w0, u, v) 					\
   do {									\
     UWtype __w1;							\
     UWtype __m0 = (u), __m1 = (v);					\
     umul_ppmm (__w1, w0, __m0, __m1);					\
     (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) 		\
 		- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0);		\
   } while (0)
 #endif
 
 /* Define this unconditionally, so it can be used for debugging.  */
 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
   do {									\
     UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;			\
     __d1 = __ll_highpart (d);						\
     __d0 = __ll_lowpart (d);						\
 									\
     __r1 = (n1) % __d1; 						\
     __q1 = (n1) / __d1; 						\
     __m = (UWtype) __q1 * __d0; 					\
     __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
     if (__r1 < __m)							\
       { 								\
 	__q1--, __r1 += (d);						\
 	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
 	  if (__r1 < __m)						\
 	    __q1--, __r1 += (d);					\
       } 								\
     __r1 -= __m;							\
 									\
     __r0 = __r1 % __d1; 						\
     __q0 = __r1 / __d1; 						\
     __m = (UWtype) __q0 * __d0; 					\
     __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
     if (__r0 < __m)							\
       { 								\
 	__q0--, __r0 += (d);						\
 	if (__r0 >= (d))						\
 	  if (__r0 < __m)						\
 	    __q0--, __r0 += (d);					\
       } 								\
     __r0 -= __m;							\
 									\
     (q) = (UWtype) __q1 * __ll_B | __q0;				\
     (r) = __r0; 							\
   } while (0)
 
 /* Use double word type if available. */
 #if !defined (udiv_qrnnd) && defined (UDWtype)
 #  define udiv_qrnnd(q, r, nh, nl, d) \
   do {									\
     UWtype __d = (d);							\
     UDWtype __nudw = (nh);						\
     __nudw <<= W_TYPE_SIZE;						\
     __nudw |= (nl);							\
     (q) = (UWtype)(__nudw / __d);					\
     (r) = (UWtype)(__nudw % __d);					\
   } while (0)
 #endif
 
 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
 #  define udiv_qrnnd(q, r, nh, nl, d) \
   do {									\
     UWtype __r; 							\
     (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); 			\
     (r) = __r;								\
   } while (0)
 #endif
 
 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
 #if !defined (udiv_qrnnd)
 #  define UDIV_NEEDS_NORMALIZATION 1
 #  define udiv_qrnnd __udiv_qrnnd_c
 #endif
 
 #if !defined (count_leading_zeros)
 #  if defined (HAVE_BUILTIN_CLZL) && SIZEOF_UNSIGNED_LONG * 8 == W_TYPE_SIZE
 #    define count_leading_zeros(count, x) (count = __builtin_clzl(x))
 #    undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */
 #  elif defined (HAVE_BUILTIN_CLZ) && SIZEOF_UNSIGNED_INT * 8 == W_TYPE_SIZE
 #    define count_leading_zeros(count, x) (count = __builtin_clz(x))
 #    undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */
 #  endif
 #endif
 
 #if !defined (count_trailing_zeros)
 #  if defined (HAVE_BUILTIN_CTZL) && SIZEOF_UNSIGNED_LONG * 8 == W_TYPE_SIZE
 #    define count_trailing_zeros(count, x) (count = __builtin_ctzl(x))
 #    undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */
 #  elif defined (HAVE_BUILTIN_CTZ) && SIZEOF_UNSIGNED_INT * 8 == W_TYPE_SIZE
 #    define count_trailing_zeros(count, x) (count = __builtin_ctz(x))
 #    undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */
 #  endif
 #endif
 
 #if !defined (count_leading_zeros)
 extern
 #  ifdef __STDC__
 const
 #  endif
 unsigned char _gcry_clz_tab[];
 #  define MPI_INTERNAL_NEED_CLZ_TAB 1
 #  define count_leading_zeros(count, x) \
   do {									\
     UWtype __xr = (x);							\
     UWtype __a; 							\
 									\
     if (W_TYPE_SIZE <= 32)						\
       { 								\
 	__a = __xr < ((UWtype) 1 << 2*__BITS4)				\
 	  ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4)		\
 	  : (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4);\
       } 								\
     else								\
       { 								\
 	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
 	  if (((__xr >> __a) & 0xff) != 0)				\
 	    break;							\
       } 								\
 									\
     (count) = W_TYPE_SIZE - (_gcry_clz_tab[__xr >> __a] + __a);		\
   } while (0)
 /* This version gives a well-defined value for zero. */
 #  define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
 #endif /* !count_leading_zeros */
 
 #if !defined (count_trailing_zeros)
 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
    defined in asm, but if it is not, the C version above is good enough.  */
 #  define count_trailing_zeros(count, x) \
   do {									\
     UWtype __ctz_x = (x);						\
     UWtype __ctz_c;							\
     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
     (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
   } while (0)
 #endif /* !count_trailing_zeros */
 
 #ifndef UDIV_NEEDS_NORMALIZATION
 #  define UDIV_NEEDS_NORMALIZATION 0
 #endif
 
 /***************************************
  ******  longlong.h ends here  *********
  ***************************************/
diff --git a/mpi/m68k/mc68020/mpih-mul1.S b/mpi/m68k/mc68020/mpih-mul1.S
index 007c94c6..1b55097a 100644
--- a/mpi/m68k/mc68020/mpih-mul1.S
+++ b/mpi/m68k/mc68020/mpih-mul1.S
@@ -1,104 +1,104 @@
 /* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store
  *                        the result in a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1996, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,	(sp + 4)
  *		  mpi_ptr_t s1_ptr,	(sp + 8)
  *		  mpi_size_t s1_size,	(sp + 12)
  *		  mpi_limb_t s2_limb)	(sp + 16)
  */
 
 
 	TEXT
 	ALIGN
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_mul_1)
 
 C_SYMBOL_NAME(_gcry_mpih_mul_1:)
 PROLOG(_gcry_mpih_mul_1)
 
 #define res_ptr a0
 #define s1_ptr a1
 #define s1_size d2
 #define s2_limb d4
 
 /* Save used registers on the stack.  */
 	moveml	R(d2)-R(d4),MEM_PREDEC(sp)
 #if 0
 	movel	R(d2),MEM_PREDEC(sp)
 	movel	R(d3),MEM_PREDEC(sp)
 	movel	R(d4),MEM_PREDEC(sp)
 #endif
 
 /* Copy the arguments to registers.  Better use movem?	*/
 	movel	MEM_DISP(sp,16),R(res_ptr)
 	movel	MEM_DISP(sp,20),R(s1_ptr)
 	movel	MEM_DISP(sp,24),R(s1_size)
 	movel	MEM_DISP(sp,28),R(s2_limb)
 
 	eorw	#1,R(s1_size)
 	clrl	R(d1)
 	lsrl	#1,R(s1_size)
 	bcc	L(L1)
 	subql	#1,R(s1_size)
 	subl	R(d0),R(d0)	/* (d0,cy) <= (0,0) */
 
 L(Loop:)
 	movel	MEM_POSTINC(s1_ptr),R(d3)
 	mulul	R(s2_limb),R(d1):R(d3)
 	addxl	R(d0),R(d3)
 	movel	R(d3),MEM_POSTINC(res_ptr)
 L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
 	mulul	R(s2_limb),R(d0):R(d3)
 	addxl	R(d1),R(d3)
 	movel	R(d3),MEM_POSTINC(res_ptr)
 
 	dbf	R(s1_size),L(Loop)
 	clrl	R(d3)
 	addxl	R(d3),R(d0)
 	subl	#0x10000,R(s1_size)
 	bcc	L(Loop)
 
 /* Restore used registers from stack frame.  */
 	moveml	MEM_POSTINC(sp),R(d2)-R(d4)
 #if 0
 	movel	MEM_POSTINC(sp),R(d4)
 	movel	MEM_POSTINC(sp),R(d3)
 	movel	MEM_POSTINC(sp),R(d2)
 #endif
 	rts
 EPILOG(_gcry_mpih_mul_1)
 
 
diff --git a/mpi/m68k/mc68020/mpih-mul2.S b/mpi/m68k/mc68020/mpih-mul2.S
index 44baa8d8..321d9ac7 100644
--- a/mpi/m68k/mc68020/mpih-mul2.S
+++ b/mpi/m68k/mc68020/mpih-mul2.S
@@ -1,94 +1,94 @@
 /* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
  *                           the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1996, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,      (sp + 4)
  *		     mpi_ptr_t s1_ptr,	     (sp + 8)
  *		     mpi_size_t s1_size,     (sp + 12)
  *		     mpi_limb_t s2_limb)     (sp + 16)
  */
 
 
 	TEXT
 	ALIGN
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_addmul_1)
 
 C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
 PROLOG(_gcry_mpih_addmul_1)
 
 #define res_ptr a0
 #define s1_ptr a1
 #define s1_size d2
 #define s2_limb d4
 
 /* Save used registers on the stack.  */
 	moveml	R(d2)-R(d5),MEM_PREDEC(sp)
 
 /* Copy the arguments to registers.  Better use movem?	*/
 	movel	MEM_DISP(sp,20),R(res_ptr)
 	movel	MEM_DISP(sp,24),R(s1_ptr)
 	movel	MEM_DISP(sp,28),R(s1_size)
 	movel	MEM_DISP(sp,32),R(s2_limb)
 
 	eorw	#1,R(s1_size)
 	clrl	R(d1)
 	clrl	R(d5)
 	lsrl	#1,R(s1_size)
 	bcc	L(L1)
 	subql	#1,R(s1_size)
 	subl	R(d0),R(d0)		/* (d0,cy) <= (0,0) */
 
 L(Loop:)
 	movel	MEM_POSTINC(s1_ptr),R(d3)
 	mulul	R(s2_limb),R(d1):R(d3)
 	addxl	R(d0),R(d3)
 	addxl	R(d5),R(d1)
 	addl	R(d3),MEM_POSTINC(res_ptr)
 L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
 	mulul	R(s2_limb),R(d0):R(d3)
 	addxl	R(d1),R(d3)
 	addxl	R(d5),R(d0)
 	addl	R(d3),MEM_POSTINC(res_ptr)
 
 	dbf	R(s1_size),L(Loop)
 	addxl	R(d5),R(d0)
 	subl	#0x10000,R(s1_size)
 	bcc	L(Loop)
 
 /* Restore used registers from stack frame.  */
 	moveml	MEM_POSTINC(sp),R(d2)-R(d5)
 
 	rts
 EPILOG(_gcry_mpih_addmul_1)
 
diff --git a/mpi/m68k/mc68020/mpih-mul3.S b/mpi/m68k/mc68020/mpih-mul3.S
index e958ef61..708fdc93 100644
--- a/mpi/m68k/mc68020/mpih-mul3.S
+++ b/mpi/m68k/mc68020/mpih-mul3.S
@@ -1,97 +1,97 @@
 /* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
  *                           the result from a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1996, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,      (sp + 4)
  *		     mpi_ptr_t s1_ptr,	     (sp + 8)
  *		     mpi_size_t s1_size,     (sp + 12)
  *		     mpi_limb_t s2_limb)     (sp + 16)
  */
 
 
 	TEXT
 	ALIGN
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_submul_1)
 
 C_SYMBOL_NAME(_gcry_mpih_submul_1:)
 PROLOG(_gcry_mpih_submul_1)
 
 #define res_ptr a0
 #define s1_ptr a1
 #define s1_size d2
 #define s2_limb d4
 
 /* Save used registers on the stack.  */
 	moveml	R(d2)-R(d5),MEM_PREDEC(sp)
 
 /* Copy the arguments to registers.  Better use movem?	*/
 	movel	MEM_DISP(sp,20),R(res_ptr)
 	movel	MEM_DISP(sp,24),R(s1_ptr)
 	movel	MEM_DISP(sp,28),R(s1_size)
 	movel	MEM_DISP(sp,32),R(s2_limb)
 
 	eorw	#1,R(s1_size)
 	clrl	R(d1)
 	clrl	R(d5)
 	lsrl	#1,R(s1_size)
 	bcc	L(L1)
 	subql	#1,R(s1_size)
 	subl	R(d0),R(d0)	/* (d0,cy) <= (0,0) */
 
 L(Loop:)
 	movel	MEM_POSTINC(s1_ptr),R(d3)
 	mulul	R(s2_limb),R(d1):R(d3)
 	addxl	R(d0),R(d3)
 	addxl	R(d5),R(d1)
 	subl	R(d3),MEM_POSTINC(res_ptr)
 L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
 	mulul	R(s2_limb),R(d0):R(d3)
 	addxl	R(d1),R(d3)
 	addxl	R(d5),R(d0)
 	subl	R(d3),MEM_POSTINC(res_ptr)
 
 	dbf	R(s1_size),L(Loop)
 	addxl	R(d5),R(d0)
 	subl	#0x10000,R(s1_size)
 	bcc	L(Loop)
 
 /* Restore used registers from stack frame.  */
 	moveml	MEM_POSTINC(sp),R(d2)-R(d5)
 
 	rts
 EPILOG(_gcry_mpih_submul_1)
 
 
diff --git a/mpi/m68k/mpih-add1.S b/mpi/m68k/mpih-add1.S
index 8182d21a..951735a3 100644
--- a/mpi/m68k/mpih-add1.S
+++ b/mpi/m68k/mpih-add1.S
@@ -1,92 +1,92 @@
 /* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
  *                        sum in a third limb vector.
  *
  *      Copyright (C) 1992, 1994,1996, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	(sp + 4)
  *		   mpi_ptr_t s1_ptr,	(sp + 8)
  *		   mpi_ptr_t s2_ptr,	(sp + 16)
  *		   mpi_size_t size)	(sp + 12)
  */
 
 
 	TEXT
 	ALIGN
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_add_n)
 
 C_SYMBOL_NAME(_gcry_mpih_add_n:)
 PROLOG(_gcry_mpih_add_n)
 	/* Save used registers on the stack.  */
 	movel	R(d2),MEM_PREDEC(sp)
 	movel	R(a2),MEM_PREDEC(sp)
 
 	/* Copy the arguments to registers.  Better use movem?	*/
 	movel	MEM_DISP(sp,12),R(a2)
 	movel	MEM_DISP(sp,16),R(a0)
 	movel	MEM_DISP(sp,20),R(a1)
 	movel	MEM_DISP(sp,24),R(d2)
 
 	eorw	#1,R(d2)
 	lsrl	#1,R(d2)
 	bcc	L(L1)
 	subql	#1,R(d2)	/* clears cy as side effect */
 
 L(Loop:)
 	movel	MEM_POSTINC(a0),R(d0)
 	movel	MEM_POSTINC(a1),R(d1)
 	addxl	R(d1),R(d0)
 	movel	R(d0),MEM_POSTINC(a2)
 L(L1:)	movel	MEM_POSTINC(a0),R(d0)
 	movel	MEM_POSTINC(a1),R(d1)
 	addxl	R(d1),R(d0)
 	movel	R(d0),MEM_POSTINC(a2)
 
 	dbf	R(d2),L(Loop)		/* loop until 16 lsb of %4 == -1 */
 	subxl	R(d0),R(d0)	/* d0 <= -cy; save cy as 0 or -1 in d0 */
 	subl	#0x10000,R(d2)
 	bcs	L(L2)
 	addl	R(d0),R(d0)	/* restore cy */
 	bra	L(Loop)
 
 L(L2:)
 	negl	R(d0)
 
 	/* Restore used registers from stack frame.  */
 	movel	MEM_POSTINC(sp),R(a2)
 	movel	MEM_POSTINC(sp),R(d2)
 
 	rts
 EPILOG(_gcry_mpih_add_n)
 
 
diff --git a/mpi/m68k/mpih-lshift.S b/mpi/m68k/mpih-lshift.S
index 133d1aae..540567c2 100644
--- a/mpi/m68k/mpih-lshift.S
+++ b/mpi/m68k/mpih-lshift.S
@@ -1,164 +1,164 @@
 /* mc68020 lshift -- Shift left a low-level natural-number integer.
  *
  *      Copyright (C) 1996, 1998, 2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_lshift( mpi_ptr_t wp,	(sp + 4)
  *		   mpi_ptr_t up,	(sp + 8)
  *		   mpi_size_t usize,	(sp + 12)
  *		   unsigned cnt)	(sp + 16)
  */
 
 #define res_ptr a1
 #define s_ptr a0
 #define s_size d6
 #define cnt d4
 
 	TEXT
 	ALIGN
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_lshift)
 
 C_SYMBOL_NAME(_gcry_mpih_lshift:)
 PROLOG(_gcry_mpih_lshift)
 
 	/* Save used registers on the stack.  */
 	moveml	R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
 
 	/* Copy the arguments to registers.  */
 	movel	MEM_DISP(sp,28),R(res_ptr)
 	movel	MEM_DISP(sp,32),R(s_ptr)
 	movel	MEM_DISP(sp,36),R(s_size)
 	movel	MEM_DISP(sp,40),R(cnt)
 
 	moveql	#1,R(d5)
 	cmpl	R(d5),R(cnt)
 	bne	L(Lnormal)
 	cmpl	R(s_ptr),R(res_ptr)
 	bls	L(Lspecial)		/* jump if s_ptr >= res_ptr */
 #if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
 	lea	MEM_INDX1(s_ptr,s_size,l,4),R(a2)
 #else /* not mc68020 */
 	movel	R(s_size),R(d0)
 	asll	#2,R(d0)
 	lea	MEM_INDX(s_ptr,d0,l),R(a2)
 #endif
 	cmpl	R(res_ptr),R(a2)
 	bls	L(Lspecial)		/* jump if res_ptr >= s_ptr + s_size */
 
 L(Lnormal:)
 	moveql	#32,R(d5)
 	subl	R(cnt),R(d5)
 
 #if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
 	lea	MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
 	lea	MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
 #else /* not mc68000 */
 	movel	R(s_size),R(d0)
 	asll	#2,R(d0)
 	addl	R(s_size),R(s_ptr)
 	addl	R(s_size),R(res_ptr)
 #endif
 	movel	MEM_PREDEC(s_ptr),R(d2)
 	movel	R(d2),R(d0)
 	lsrl	R(d5),R(d0)		/* compute carry limb */
 
 	lsll	R(cnt),R(d2)
 	movel	R(d2),R(d1)
 	subql	#1,R(s_size)
 	beq	L(Lend)
 	lsrl	#1,R(s_size)
 	bcs	L(L1)
 	subql	#1,R(s_size)
 
 L(Loop:)
 	movel	MEM_PREDEC(s_ptr),R(d2)
 	movel	R(d2),R(d3)
 	lsrl	R(d5),R(d3)
 	orl	R(d3),R(d1)
 	movel	R(d1),MEM_PREDEC(res_ptr)
 	lsll	R(cnt),R(d2)
 L(L1:)
 	movel	MEM_PREDEC(s_ptr),R(d1)
 	movel	R(d1),R(d3)
 	lsrl	R(d5),R(d3)
 	orl	R(d3),R(d2)
 	movel	R(d2),MEM_PREDEC(res_ptr)
 	lsll	R(cnt),R(d1)
 
 	dbf	R(s_size),L(Loop)
 	subl	#0x10000,R(s_size)
 	bcc	L(Loop)
 
 L(Lend:)
 	movel	R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
 
 /* Restore used registers from stack frame.  */
 	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
 	rts
 
 /* We loop from least significant end of the arrays, which is only
    permissable if the source and destination don't overlap, since the
    function is documented to work for overlapping source and destination.  */
 
 L(Lspecial:)
 	clrl	R(d0)			/* initialize carry */
 	eorw	#1,R(s_size)
 	lsrl	#1,R(s_size)
 	bcc	L(LL1)
 	subql	#1,R(s_size)
 
 L(LLoop:)
 	movel	MEM_POSTINC(s_ptr),R(d2)
 	addxl	R(d2),R(d2)
 	movel	R(d2),MEM_POSTINC(res_ptr)
 L(LL1:)
 	movel	MEM_POSTINC(s_ptr),R(d2)
 	addxl	R(d2),R(d2)
 	movel	R(d2),MEM_POSTINC(res_ptr)
 
 	dbf	R(s_size),L(LLoop)
 	addxl	R(d0),R(d0)		/* save cy in lsb */
 	subl	#0x10000,R(s_size)
 	bcs	L(LLend)
 	lsrl	#1,R(d0)		/* restore cy */
 	bra	L(LLoop)
 
 L(LLend:)
 /* Restore used registers from stack frame.  */
 	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
 	rts
 EPILOG(_gcry_mpih_lshift)
 
 
 
 
 
diff --git a/mpi/m68k/mpih-rshift.S b/mpi/m68k/mpih-rshift.S
index be9f4350..964c141f 100644
--- a/mpi/m68k/mpih-rshift.S
+++ b/mpi/m68k/mpih-rshift.S
@@ -1,162 +1,162 @@
 /* mc68020 rshift -- Shift right a low-level natural-number integer.
  *
  *      Copyright (C) 1996, 1998, 2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_rshift( mpi_ptr_t wp,	(sp + 4)
  *		   mpi_ptr_t up,	(sp + 8)
  *		   mpi_size_t usize,	(sp + 12)
  *		   unsigned cnt)	(sp + 16)
  */
 
 #define res_ptr a1
 #define s_ptr a0
 #define s_size d6
 #define cnt d4
 
 	TEXT
 	ALIGN
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_rshift)
 
 C_SYMBOL_NAME(_gcry_mpih_rshift:)
 PROLOG(_gcry_mpih_rshift)
 	/* Save used registers on the stack.  */
 	moveml	R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
 
 	/* Copy the arguments to registers.  */
 	movel	MEM_DISP(sp,28),R(res_ptr)
 	movel	MEM_DISP(sp,32),R(s_ptr)
 	movel	MEM_DISP(sp,36),R(s_size)
 	movel	MEM_DISP(sp,40),R(cnt)
 
 	moveql	#1,R(d5)
 	cmpl	R(d5),R(cnt)
 	bne	L(Rnormal)
 	cmpl	R(res_ptr),R(s_ptr)
 	bls	L(Rspecial)		/* jump if res_ptr >= s_ptr */
 #if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
 	lea	MEM_INDX1(res_ptr,s_size,l,4),R(a2)
 #else /* not mc68020 */
 	movel	R(s_size),R(d0)
 	asll	#2,R(d0)
 	lea	MEM_INDX(res_ptr,d0,l),R(a2)
 #endif
 	cmpl	R(s_ptr),R(a2)
 	bls	L(Rspecial)		/* jump if s_ptr >= res_ptr + s_size */
 
 L(Rnormal:)
 	moveql	#32,R(d5)
 	subl	R(cnt),R(d5)
 	movel	MEM_POSTINC(s_ptr),R(d2)
 	movel	R(d2),R(d0)
 	lsll	R(d5),R(d0)		/* compute carry limb */
 
 	lsrl	R(cnt),R(d2)
 	movel	R(d2),R(d1)
 	subql	#1,R(s_size)
 	beq	L(Rend)
 	lsrl	#1,R(s_size)
 	bcs	L(R1)
 	subql	#1,R(s_size)
 
 L(Roop:)
 	movel	MEM_POSTINC(s_ptr),R(d2)
 	movel	R(d2),R(d3)
 	lsll	R(d5),R(d3)
 	orl	R(d3),R(d1)
 	movel	R(d1),MEM_POSTINC(res_ptr)
 	lsrl	R(cnt),R(d2)
 L(R1:)
 	movel	MEM_POSTINC(s_ptr),R(d1)
 	movel	R(d1),R(d3)
 	lsll	R(d5),R(d3)
 	orl	R(d3),R(d2)
 	movel	R(d2),MEM_POSTINC(res_ptr)
 	lsrl	R(cnt),R(d1)
 
 	dbf	R(s_size),L(Roop)
 	subl	#0x10000,R(s_size)
 	bcc	L(Roop)
 
 L(Rend:)
 	movel	R(d1),MEM(res_ptr) /* store most significant limb */
 
 /* Restore used registers from stack frame.  */
 	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
 	rts
 
 /* We loop from most significant end of the arrays, which is only
    permissable if the source and destination don't overlap, since the
    function is documented to work for overlapping source and destination.  */
 
 L(Rspecial:)
 #if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
 	lea	MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
 	lea	MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
 #else /* not mc68000 */
 	movel	R(s_size),R(d0)
 	asll	#2,R(d0)
 	addl	R(s_size),R(s_ptr)
 	addl	R(s_size),R(res_ptr)
 #endif
 
 	clrl	R(d0)			/* initialize carry */
 	eorw	#1,R(s_size)
 	lsrl	#1,R(s_size)
 	bcc	L(LR1)
 	subql	#1,R(s_size)
 
 L(LRoop:)
 	movel	MEM_PREDEC(s_ptr),R(d2)
 	roxrl	#1,R(d2)
 	movel	R(d2),MEM_PREDEC(res_ptr)
 L(LR1:)
 	movel	MEM_PREDEC(s_ptr),R(d2)
 	roxrl	#1,R(d2)
 	movel	R(d2),MEM_PREDEC(res_ptr)
 
 	dbf	R(s_size),L(LRoop)
 	roxrl	#1,R(d0)		/* save cy in msb */
 	subl	#0x10000,R(s_size)
 	bcs	L(LRend)
 	addl	R(d0),R(d0)		/* restore cy */
 	bra	L(LRoop)
 
 L(LRend:)
 /* Restore used registers from stack frame.  */
 	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
 	rts
 EPILOG(_gcry_mpih_rshift)
 
 
 
 
diff --git a/mpi/m68k/mpih-sub1.S b/mpi/m68k/mpih-sub1.S
index ee7555f8..f81569ce 100644
--- a/mpi/m68k/mpih-sub1.S
+++ b/mpi/m68k/mpih-sub1.S
@@ -1,91 +1,91 @@
 /* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
  *                        store difference in a third limb vector.
  *
  *      Copyright (C) 1992, 1994, 1996, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	(sp + 4)
  *		   mpi_ptr_t s1_ptr,	(sp + 8)
  *		   mpi_ptr_t s2_ptr,	(sp + 16)
  *		   mpi_size_t size)	(sp + 12)
  */
 
 
 	TEXT
 	ALIGN
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_sub_n)
 
 C_SYMBOL_NAME(_gcry_mpih_sub_n:)
 PROLOG(_gcry_mpih_sub_n)
 /* Save used registers on the stack.  */
 	movel	R(d2),MEM_PREDEC(sp)
 	movel	R(a2),MEM_PREDEC(sp)
 
 /* Copy the arguments to registers.  Better use movem?	*/
 	movel	MEM_DISP(sp,12),R(a2)
 	movel	MEM_DISP(sp,16),R(a0)
 	movel	MEM_DISP(sp,20),R(a1)
 	movel	MEM_DISP(sp,24),R(d2)
 
 	eorw	#1,R(d2)
 	lsrl	#1,R(d2)
 	bcc	L(L1)
 	subql	#1,R(d2)	/* clears cy as side effect */
 
 L(Loop:)
 	movel	MEM_POSTINC(a0),R(d0)
 	movel	MEM_POSTINC(a1),R(d1)
 	subxl	R(d1),R(d0)
 	movel	R(d0),MEM_POSTINC(a2)
 L(L1:)	movel	MEM_POSTINC(a0),R(d0)
 	movel	MEM_POSTINC(a1),R(d1)
 	subxl	R(d1),R(d0)
 	movel	R(d0),MEM_POSTINC(a2)
 
 	dbf	R(d2),L(Loop)		/* loop until 16 lsb of %4 == -1 */
 	subxl	R(d0),R(d0)	/* d0 <= -cy; save cy as 0 or -1 in d0 */
 	subl	#0x10000,R(d2)
 	bcs	L(L2)
 	addl	R(d0),R(d0)	/* restore cy */
 	bra	L(Loop)
 
 L(L2:)
 	negl	R(d0)
 
 /* Restore used registers from stack frame.  */
 	movel	MEM_POSTINC(sp),R(a2)
 	movel	MEM_POSTINC(sp),R(d2)
 
 	rts
 EPILOG(_gcry_mpih_sub_n)
 
 
diff --git a/mpi/m68k/syntax.h b/mpi/m68k/syntax.h
index e27de98b..1c528db7 100644
--- a/mpi/m68k/syntax.h
+++ b/mpi/m68k/syntax.h
@@ -1,185 +1,185 @@
 /* asm.h -- Definitions for 68k syntax variations.
  *
  *      Copyright (C) 1992, 1994, 1996, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 #undef ALIGN
 
 #ifdef MIT_SYNTAX
 #define PROLOG(name)
 #define EPILOG(name)
 #define R(r)r
 #define MEM(base)base@
 #define MEM_DISP(base,displacement)base@(displacement)
 #define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix)
 #define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale)
 #define MEM_PREDEC(memory_base)memory_base@-
 #define MEM_POSTINC(memory_base)memory_base@+
 #define L(label) label
 #define TEXT .text
 #define ALIGN .even
 #define GLOBL .globl
 #define moveql moveq
 /* Use variable sized opcodes.  */
 #define bcc jcc
 #define bcs jcs
 #define bls jls
 #define beq jeq
 #define bne jne
 #define bra jra
 #endif
 
 #ifdef SONY_SYNTAX
 #define PROLOG(name)
 #define EPILOG(name)
 #define R(r)r
 #define MEM(base)(base)
 #define MEM_DISP(base,displacement)(displacement,base)
 #define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
 #define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
 #define MEM_PREDEC(memory_base)-(memory_base)
 #define MEM_POSTINC(memory_base)(memory_base)+
 #define L(label) label
 #define TEXT .text
 #define ALIGN .even
 #define GLOBL .globl
 #endif
 
 #ifdef MOTOROLA_SYNTAX
 #define PROLOG(name)
 #define EPILOG(name)
 #define R(r)r
 #define MEM(base)(base)
 #define MEM_DISP(base,displacement)(displacement,base)
 #define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
 #define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
 #define MEM_PREDEC(memory_base)-(memory_base)
 #define MEM_POSTINC(memory_base)(memory_base)+
 #define L(label) label
 #define TEXT
 #define ALIGN
 #define GLOBL XDEF
 #define lea LEA
 #define movel MOVE.L
 #define moveml MOVEM.L
 #define moveql MOVEQ.L
 #define cmpl CMP.L
 #define orl OR.L
 #define clrl CLR.L
 #define eorw EOR.W
 #define lsrl LSR.L
 #define lsll LSL.L
 #define roxrl ROXR.L
 #define roxll ROXL.L
 #define addl ADD.L
 #define addxl ADDX.L
 #define addql ADDQ.L
 #define subl SUB.L
 #define subxl SUBX.L
 #define subql SUBQ.L
 #define negl NEG.L
 #define mulul MULU.L
 #define bcc BCC
 #define bcs BCS
 #define bls BLS
 #define beq BEQ
 #define bne BNE
 #define bra BRA
 #define dbf DBF
 #define rts RTS
 #define d0 D0
 #define d1 D1
 #define d2 D2
 #define d3 D3
 #define d4 D4
 #define d5 D5
 #define d6 D6
 #define d7 D7
 #define a0 A0
 #define a1 A1
 #define a2 A2
 #define a3 A3
 #define a4 A4
 #define a5 A5
 #define a6 A6
 #define a7 A7
 #define sp SP
 #endif
 
 #ifdef ELF_SYNTAX
 #define PROLOG(name) .type name,@function
 #define EPILOG(name) .size name,.-name
 #define MEM(base)(R(base))
 #define MEM_DISP(base,displacement)(displacement,R(base))
 #define MEM_PREDEC(memory_base)-(R(memory_base))
 #define MEM_POSTINC(memory_base)(R(memory_base))+
 #ifdef __STDC__
 #define R_(r)%##r
 #define R(r)R_(r)
 #define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix))
 #define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix)
 #define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale))
 #define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale)
 #define L(label) .##label
 #else
 #define R(r)%/**/r
 #define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix)
 #define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale)
 #define L(label) ./**/label
 #endif
 #define TEXT .text
 #define ALIGN .align 2
 #define GLOBL .globl
 #define bcc jbcc
 #define bcs jbcs
 #define bls jbls
 #define beq jbeq
 #define bne jbne
 #define bra jbra
 #endif
 
 #if defined (SONY_SYNTAX) || defined (ELF_SYNTAX)
 #define movel move.l
 #define moveml movem.l
 #define moveql moveq.l
 #define cmpl cmp.l
 #define orl or.l
 #define clrl clr.l
 #define eorw eor.w
 #define lsrl lsr.l
 #define lsll lsl.l
 #define roxrl roxr.l
 #define roxll roxl.l
 #define addl add.l
 #define addxl addx.l
 #define addql addq.l
 #define subl sub.l
 #define subxl subx.l
 #define subql subq.l
 #define negl neg.l
 #define mulul mulu.l
 #endif
diff --git a/mpi/mips3/mpih-add1.S b/mpi/mips3/mpih-add1.S
index f3db029d..ba5ac195 100644
--- a/mpi/mips3/mpih-add1.S
+++ b/mpi/mips3/mpih-add1.S
@@ -1,124 +1,124 @@
 /* mips3  add_n -- Add two limb vectors of the same length > 0 and store
  *		   sum in a third limb vector.
  *
  *      Copyright (C) 1995, 1998, 2000
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	($4)
  *		   mpi_ptr_t s1_ptr,	($5)
  *		   mpi_ptr_t s2_ptr,	($6)
  *		   mpi_size_t size)	($7)
  */
 
 	.text
 	.align	2
 	.globl	_gcry_mpih_add_n
 	.ent	_gcry_mpih_add_n
 _gcry_mpih_add_n:
 	.set	noreorder
 	.set	nomacro
 
 	ld	$10,0($5)
 	ld	$11,0($6)
 
 	daddiu	$7,$7,-1
 	and	$9,$7,4-1	# number of limbs in first loop
 	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
 	 move	$2,$0
 
 	dsubu	$7,$7,$9
 
 .Loop0: daddiu	$9,$9,-1
 	ld	$12,8($5)
 	daddu	$11,$11,$2
 	ld	$13,8($6)
 	sltu	$8,$11,$2
 	daddu	$11,$10,$11
 	sltu	$2,$11,$10
 	sd	$11,0($4)
 	or	$2,$2,$8
 
 	daddiu	$5,$5,8
 	daddiu	$6,$6,8
 	move	$10,$12
 	move	$11,$13
 	bne	$9,$0,.Loop0
 	 daddiu $4,$4,8
 
 .L0:	beq	$7,$0,.Lend
 	 nop
 
 .Loop:	daddiu	$7,$7,-4
 
 	ld	$12,8($5)
 	daddu	$11,$11,$2
 	ld	$13,8($6)
 	sltu	$8,$11,$2
 	daddu	$11,$10,$11
 	sltu	$2,$11,$10
 	sd	$11,0($4)
 	or	$2,$2,$8
 
 	ld	$10,16($5)
 	daddu	$13,$13,$2
 	ld	$11,16($6)
 	sltu	$8,$13,$2
 	daddu	$13,$12,$13
 	sltu	$2,$13,$12
 	sd	$13,8($4)
 	or	$2,$2,$8
 
 	ld	$12,24($5)
 	daddu	$11,$11,$2
 	ld	$13,24($6)
 	sltu	$8,$11,$2
 	daddu	$11,$10,$11
 	sltu	$2,$11,$10
 	sd	$11,16($4)
 	or	$2,$2,$8
 
 	ld	$10,32($5)
 	daddu	$13,$13,$2
 	ld	$11,32($6)
 	sltu	$8,$13,$2
 	daddu	$13,$12,$13
 	sltu	$2,$13,$12
 	sd	$13,24($4)
 	or	$2,$2,$8
 
 	daddiu	$5,$5,32
 	daddiu	$6,$6,32
 
 	bne	$7,$0,.Loop
 	 daddiu $4,$4,32
 
 .Lend:	daddu	$11,$11,$2
 	sltu	$8,$11,$2
 	daddu	$11,$10,$11
 	sltu	$2,$11,$10
 	sd	$11,0($4)
 	j	$31
 	or	$2,$2,$8
 
 	.end	_gcry_mpih_add_n
 
diff --git a/mpi/mips3/mpih-lshift.S b/mpi/mips3/mpih-lshift.S
index 084c109b..013ee4a7 100644
--- a/mpi/mips3/mpih-lshift.S
+++ b/mpi/mips3/mpih-lshift.S
@@ -1,97 +1,97 @@
 /* mips3    lshift
  *
  *      Copyright (C) 1995, 1998, 2000,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_lshift( mpi_ptr_t wp,	($4)
  *		   mpi_ptr_t up,	($5)
  *		   mpi_size_t usize,	($6)
  *		   unsigned cnt)	($7)
  */
 
 	.text
 	.align	2
 	.globl	_gcry_mpih_lshift
 	.ent	_gcry_mpih_lshift
 _gcry_mpih_lshift:
 	.set	noreorder
 	.set	nomacro
 
 	dsll	$2,$6,3
 	daddu	$5,$5,$2	# make r5 point at end of src
 	ld	$10,-8($5)	# load first limb
 	dsubu	$13,$0,$7
 	daddu	$4,$4,$2	# make r4 point at end of res
 	daddiu	$6,$6,-1
 	and	$9,$6,4-1	# number of limbs in first loop
 	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
 	 dsrl	$2,$10,$13	# compute function result
 
 	dsubu	$6,$6,$9
 
 .Loop0: ld	$3,-16($5)
 	daddiu	$4,$4,-8
 	daddiu	$5,$5,-8
 	daddiu	$9,$9,-1
 	dsll	$11,$10,$7
 	dsrl	$12,$3,$13
 	move	$10,$3
 	or	$8,$11,$12
 	bne	$9,$0,.Loop0
 	 sd	$8,0($4)
 
 .L0:	beq	$6,$0,.Lend
 	 nop
 
 .Loop:	ld	$3,-16($5)
 	daddiu	$4,$4,-32
 	daddiu	$6,$6,-4
 	dsll	$11,$10,$7
 	dsrl	$12,$3,$13
 
 	ld	$10,-24($5)
 	dsll	$14,$3,$7
 	or	$8,$11,$12
 	sd	$8,24($4)
 	dsrl	$9,$10,$13
 
 	ld	$3,-32($5)
 	dsll	$11,$10,$7
 	or	$8,$14,$9
 	sd	$8,16($4)
 	dsrl	$12,$3,$13
 
 	ld	$10,-40($5)
 	dsll	$14,$3,$7
 	or	$8,$11,$12
 	sd	$8,8($4)
 	dsrl	$9,$10,$13
 
 	daddiu	$5,$5,-32
 	or	$8,$14,$9
 	bgtz	$6,.Loop
 	 sd	$8,0($4)
 
 .Lend:	dsll	$8,$10,$7
 	j	$31
 	sd	$8,-8($4)
 	.end	_gcry_mpih_lshift
diff --git a/mpi/mips3/mpih-mul1.S b/mpi/mips3/mpih-mul1.S
index 6c0099de..b2a6d161 100644
--- a/mpi/mips3/mpih-mul1.S
+++ b/mpi/mips3/mpih-mul1.S
@@ -1,89 +1,89 @@
 /* mips3 mpih-mul1.S -- Multiply a limb vector with a limb and store
  *			the result in a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 2000
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,	(r4)
  *		  mpi_ptr_t s1_ptr,	(r5)
  *		  mpi_size_t s1_size,	(r6)
  *		  mpi_limb_t s2_limb)	(r7)
  */
 
 	.text
 	.align	4
 	.globl	_gcry_mpih_mul_1
 	.ent	_gcry_mpih_mul_1
 _gcry_mpih_mul_1:
 	.set	noreorder
 	.set	nomacro
 
 /* # warm up phase 0 */
 	ld	$8,0($5)
 
 /* # warm up phase 1 */
 	daddiu	$5,$5,8
 	dmultu	$8,$7
 
 	daddiu	$6,$6,-1
 	beq	$6,$0,$LC0
 	 move	$2,$0		# zero cy2
 
 	daddiu	$6,$6,-1
 	beq	$6,$0,$LC1
 	ld	$8,0($5)	# load new s1 limb as early as possible
 
 Loop:	mflo	$10
 	mfhi	$9
 	daddiu	$5,$5,8
 	daddu	$10,$10,$2	# add old carry limb to low product limb
 	dmultu	$8,$7
 	ld	$8,0($5)	# load new s1 limb as early as possible
 	daddiu	$6,$6,-1	# decrement loop counter
 	sltu	$2,$10,$2	# carry from previous addition -> $2
 	sd	$10,0($4)
 	daddiu	$4,$4,8
 	bne	$6,$0,Loop
 	 daddu	$2,$9,$2	# add high product limb and carry from addition
 
 /* # cool down phase 1 */
 $LC1:	mflo	$10
 	mfhi	$9
 	daddu	$10,$10,$2
 	sltu	$2,$10,$2
 	dmultu	$8,$7
 	sd	$10,0($4)
 	daddiu	$4,$4,8
 	daddu	$2,$9,$2	# add high product limb and carry from addition
 
 /* # cool down phase 0 */
 $LC0:	mflo	$10
 	mfhi	$9
 	daddu	$10,$10,$2
 	sltu	$2,$10,$2
 	sd	$10,0($4)
 	j	$31
 	daddu	$2,$9,$2	# add high product limb and carry from addition
 
 	.end	_gcry_mpih_mul_1
 
diff --git a/mpi/mips3/mpih-mul2.S b/mpi/mips3/mpih-mul2.S
index ca827638..49ccce21 100644
--- a/mpi/mips3/mpih-mul2.S
+++ b/mpi/mips3/mpih-mul2.S
@@ -1,101 +1,101 @@
 /* MIPS3   addmul_1 -- Multiply a limb vector with a single limb and
  *		       add the product to a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 2000
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,      (r4)
  *		     mpi_ptr_t s1_ptr,	     (r5)
  *		     mpi_size_t s1_size,     (r6)
  *		     mpi_limb_t s2_limb)     (r7)
  */
 
 	.text
 	.align	4
 	.globl	_gcry_mpih_addmul_1
 	.ent	_gcry_mpih_addmul_1
 _gcry_mpih_addmul_1:
 	.set	noreorder
 	.set	nomacro
 
 /* # warm up phase 0 */
 	ld	$8,0($5)
 
 /* # warm up phase 1 */
 	daddiu	$5,$5,8
 	dmultu	$8,$7
 
 	daddiu	$6,$6,-1
 	beq	$6,$0,$LC0
 	 move	$2,$0		# zero cy2
 
 	daddiu	$6,$6,-1
 	beq	$6,$0,$LC1
 	ld	$8,0($5)	# load new s1 limb as early as possible
 
 Loop:	ld	$10,0($4)
 	mflo	$3
 	mfhi	$9
 	daddiu	$5,$5,8
 	daddu	$3,$3,$2	# add old carry limb to low product limb
 	dmultu	$8,$7
 	ld	$8,0($5)	# load new s1 limb as early as possible
 	daddiu	$6,$6,-1	# decrement loop counter
 	sltu	$2,$3,$2	# carry from previous addition -> $2
 	daddu	$3,$10,$3
 	sltu	$10,$3,$10
 	daddu	$2,$2,$10
 	sd	$3,0($4)
 	daddiu	$4,$4,8
 	bne	$6,$0,Loop
 	 daddu	$2,$9,$2	# add high product limb and carry from addition
 
 /* # cool down phase 1 */
 $LC1:	ld	$10,0($4)
 	mflo	$3
 	mfhi	$9
 	daddu	$3,$3,$2
 	sltu	$2,$3,$2
 	dmultu	$8,$7
 	daddu	$3,$10,$3
 	sltu	$10,$3,$10
 	daddu	$2,$2,$10
 	sd	$3,0($4)
 	daddiu	$4,$4,8
 	daddu	$2,$9,$2	# add high product limb and carry from addition
 
 /* # cool down phase 0 */
 $LC0:	ld	$10,0($4)
 	mflo	$3
 	mfhi	$9
 	daddu	$3,$3,$2
 	sltu	$2,$3,$2
 	daddu	$3,$10,$3
 	sltu	$10,$3,$10
 	daddu	$2,$2,$10
 	sd	$3,0($4)
 	j	$31
 	daddu	$2,$9,$2	# add high product limb and carry from addition
 
 	.end	_gcry_mpih_addmul_1
 
diff --git a/mpi/mips3/mpih-mul3.S b/mpi/mips3/mpih-mul3.S
index be421a68..a951ba69 100644
--- a/mpi/mips3/mpih-mul3.S
+++ b/mpi/mips3/mpih-mul3.S
@@ -1,101 +1,101 @@
 /* MIPS3 submul_1 -- Multiply a limb vector with a single limb and
  *		     subtract the product from a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998, 2000
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,      (r4)
  *		     mpi_ptr_t s1_ptr,	     (r5)
  *		     mpi_size_t s1_size,     (r6)
  *		     mpi_limb_t s2_limb)     (r7)
  */
 
 	.text
 	.align	4
 	.globl	_gcry_mpih_submul_1
 	.ent	_gcry_mpih_submul_1
 _gcry_mpih_submul_1:
 	.set	noreorder
 	.set	nomacro
 
 /* # warm up phase 0 */
 	ld	$8,0($5)
 
 /* # warm up phase 1 */
 	daddiu	$5,$5,8
 	dmultu	$8,$7
 
 	daddiu	$6,$6,-1
 	beq	$6,$0,$LC0
 	 move	$2,$0		# zero cy2
 
 	daddiu	$6,$6,-1
 	beq	$6,$0,$LC1
 	ld	$8,0($5)	# load new s1 limb as early as possible
 
 Loop:	ld	$10,0($4)
 	mflo	$3
 	mfhi	$9
 	daddiu	$5,$5,8
 	daddu	$3,$3,$2	# add old carry limb to low product limb
 	dmultu	$8,$7
 	ld	$8,0($5)	# load new s1 limb as early as possible
 	daddiu	$6,$6,-1	# decrement loop counter
 	sltu	$2,$3,$2	# carry from previous addition -> $2
 	dsubu	$3,$10,$3
 	sgtu	$10,$3,$10
 	daddu	$2,$2,$10
 	sd	$3,0($4)
 	daddiu	$4,$4,8
 	bne	$6,$0,Loop
 	 daddu	$2,$9,$2	# add high product limb and carry from addition
 
 /* # cool down phase 1 */
 $LC1:	ld	$10,0($4)
 	mflo	$3
 	mfhi	$9
 	daddu	$3,$3,$2
 	sltu	$2,$3,$2
 	dmultu	$8,$7
 	dsubu	$3,$10,$3
 	sgtu	$10,$3,$10
 	daddu	$2,$2,$10
 	sd	$3,0($4)
 	daddiu	$4,$4,8
 	daddu	$2,$9,$2	# add high product limb and carry from addition
 
 /* # cool down phase 0 */
 $LC0:	ld	$10,0($4)
 	mflo	$3
 	mfhi	$9
 	daddu	$3,$3,$2
 	sltu	$2,$3,$2
 	dsubu	$3,$10,$3
 	sgtu	$10,$3,$10
 	daddu	$2,$2,$10
 	sd	$3,0($4)
 	j	$31
 	daddu	$2,$9,$2	# add high product limb and carry from addition
 
 	.end	_gcry_mpih_submul_1
 
diff --git a/mpi/mips3/mpih-rshift.S b/mpi/mips3/mpih-rshift.S
index e7e035a0..a8d39854 100644
--- a/mpi/mips3/mpih-rshift.S
+++ b/mpi/mips3/mpih-rshift.S
@@ -1,95 +1,95 @@
 /* mips3    rshift
  *
  *      Copyright (C) 1995, 1998, 2000
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_rshift( mpi_ptr_t wp,	($4)
  *		   mpi_ptr_t up,	($5)
  *		   mpi_size_t usize,	($6)
  *		   unsigned cnt)	($7)
  */
 
 	.text
 	.align	2
 	.globl	_gcry_mpih_rshift
 	.ent	_gcry_mpih_rshift
 _gcry_mpih_rshift:
 	.set	noreorder
 	.set	nomacro
 
 	ld	$10,0($5)	# load first limb
 	dsubu	$13,$0,$7
 	daddiu	$6,$6,-1
 	and	$9,$6,4-1	# number of limbs in first loop
 	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
 	 dsll	$2,$10,$13	# compute function result
 
 	dsubu	$6,$6,$9
 
 .Loop0: ld	$3,8($5)
 	daddiu	$4,$4,8
 	daddiu	$5,$5,8
 	daddiu	$9,$9,-1
 	dsrl	$11,$10,$7
 	dsll	$12,$3,$13
 	move	$10,$3
 	or	$8,$11,$12
 	bne	$9,$0,.Loop0
 	 sd	$8,-8($4)
 
 .L0:	beq	$6,$0,.Lend
 	 nop
 
 .Loop:	ld	$3,8($5)
 	daddiu	$4,$4,32
 	daddiu	$6,$6,-4
 	dsrl	$11,$10,$7
 	dsll	$12,$3,$13
 
 	ld	$10,16($5)
 	dsrl	$14,$3,$7
 	or	$8,$11,$12
 	sd	$8,-32($4)
 	dsll	$9,$10,$13
 
 	ld	$3,24($5)
 	dsrl	$11,$10,$7
 	or	$8,$14,$9
 	sd	$8,-24($4)
 	dsll	$12,$3,$13
 
 	ld	$10,32($5)
 	dsrl	$14,$3,$7
 	or	$8,$11,$12
 	sd	$8,-16($4)
 	dsll	$9,$10,$13
 
 	daddiu	$5,$5,32
 	or	$8,$14,$9
 	bgtz	$6,.Loop
 	 sd	$8,-8($4)
 
 .Lend:	dsrl	$8,$10,$7
 	j	$31
 	sd	$8,0($4)
 	.end	_gcry_mpih_rshift
 
diff --git a/mpi/mips3/mpih-sub1.S b/mpi/mips3/mpih-sub1.S
index 9fac6743..ae444b17 100644
--- a/mpi/mips3/mpih-sub1.S
+++ b/mpi/mips3/mpih-sub1.S
@@ -1,125 +1,125 @@
 /* mips3  sub_n -- Subtract two limb vectors of the same length > 0 and
  *		  store difference in a third limb vector.
  * 
  *      Copyright (C) 1995, 1998, 1999, 2000,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	(r4)
  *		   mpi_ptr_t s1_ptr,	(r5)
  *		   mpi_ptr_t s2_ptr,	(r6)
  *		   mpi_size_t size)	(r7)
  */
 
 
 	.text
 	.align	2
 	.globl	_gcry_mpih_sub_n
 	.ent	_gcry_mpih_sub_n
 _gcry_mpih_sub_n:
 	.set	noreorder
 	.set	nomacro
 
 	ld	$10,0($5)
 	ld	$11,0($6)
 
 	daddiu	$7,$7,-1
 	and	$9,$7,4-1	# number of limbs in first loop
 	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
 	move   $2,$0
 
 	dsubu	$7,$7,$9
 
 .Loop0: daddiu	$9,$9,-1
 	ld	$12,8($5)
 	daddu	$11,$11,$2
 	ld	$13,8($6)
 	sltu	$8,$11,$2
 	dsubu	$11,$10,$11
 	sltu	$2,$10,$11
 	sd	$11,0($4)
 	or	$2,$2,$8
 
 	daddiu	$5,$5,8
 	daddiu	$6,$6,8
 	move	$10,$12
 	move	$11,$13
 	bne	$9,$0,.Loop0
 	 daddiu $4,$4,8
 
 .L0:	beq	$7,$0,.Lend
 	 nop
 
 .Loop:	daddiu	$7,$7,-4
 
 	ld	$12,8($5)
 	daddu	$11,$11,$2
 	ld	$13,8($6)
 	sltu	$8,$11,$2
 	dsubu	$11,$10,$11
 	sltu	$2,$10,$11
 	sd	$11,0($4)
 	or	$2,$2,$8
 
 	ld	$10,16($5)
 	daddu	$13,$13,$2
 	ld	$11,16($6)
 	sltu	$8,$13,$2
 	dsubu	$13,$12,$13
 	sltu	$2,$12,$13
 	sd	$13,8($4)
 	or	$2,$2,$8
 
 	ld	$12,24($5)
 	daddu	$11,$11,$2
 	ld	$13,24($6)
 	sltu	$8,$11,$2
 	dsubu	$11,$10,$11
 	sltu	$2,$10,$11
 	sd	$11,16($4)
 	or	$2,$2,$8
 
 	ld	$10,32($5)
 	daddu	$13,$13,$2
 	ld	$11,32($6)
 	sltu	$8,$13,$2
 	dsubu	$13,$12,$13
 	sltu	$2,$12,$13
 	sd	$13,24($4)
 	or	$2,$2,$8
 
 	daddiu	$5,$5,32
 	daddiu	$6,$6,32
 
 	bne	$7,$0,.Loop
 	 daddiu $4,$4,32
 
 .Lend:	daddu	$11,$11,$2
 	sltu	$8,$11,$2
 	dsubu	$11,$10,$11
 	sltu	$2,$10,$11
 	sd	$11,0($4)
 	j	$31
 	or	$2,$2,$8
 
 	.end	_gcry_mpih_sub_n
 
diff --git a/mpi/mpi-add.c b/mpi/mpi-add.c
index 2fd19e55..41dc3900 100644
--- a/mpi/mpi-add.c
+++ b/mpi/mpi-add.c
@@ -1,239 +1,239 @@
 /* mpi-add.c  -  MPI functions
  * Copyright (C) 1994, 1996, 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 #include "mpi-internal.h"
 
 
 /****************
  * Add the unsigned integer V to the mpi-integer U and store the
  * result in W. U and V may be the same.
  */
 void
 _gcry_mpi_add_ui (gcry_mpi_t w, gcry_mpi_t u, unsigned long v )
 {
     mpi_ptr_t wp, up;
     mpi_size_t usize, wsize;
     int usign, wsign;
 
     usize = u->nlimbs;
     usign = u->sign;
     wsign = 0;
 
     /* If not space for W (and possible carry), increase space.  */
     wsize = usize + 1;
     if( w->alloced < wsize )
 	mpi_resize(w, wsize);
 
     /* These must be after realloc (U may be the same as W).  */
     up = u->d;
     wp = w->d;
 
     if( !usize ) {  /* simple */
 	wp[0] = v;
 	wsize = v? 1:0;
     }
     else if( !usign ) {  /* mpi is not negative */
 	mpi_limb_t cy;
 	cy = _gcry_mpih_add_1(wp, up, usize, v);
 	wp[usize] = cy;
 	wsize = usize + cy;
     }
     else {  /* The signs are different.  Need exact comparison to determine
 	     * which operand to subtract from which.  */
 	if( usize == 1 && up[0] < v ) {
 	    wp[0] = v - up[0];
 	    wsize = 1;
 	}
 	else {
 	    _gcry_mpih_sub_1(wp, up, usize, v);
 	    /* Size can decrease with at most one limb. */
 	    wsize = usize - (wp[usize-1]==0);
 	    wsign = 1;
 	}
     }
 
     w->nlimbs = wsize;
     w->sign   = wsign;
 }
 
 
 static void
 _gcry_mpi_add_inv_sign(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, int inv_v_sign)
 {
     mpi_ptr_t wp, up, vp;
     mpi_size_t usize, vsize, wsize;
     int usign, vsign, wsign;
 
     if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */
 	usize = v->nlimbs;
 	usign = v->sign ^ inv_v_sign;
 	vsize = u->nlimbs;
 	vsign = u->sign;
 	wsize = usize + 1;
 	RESIZE_IF_NEEDED(w, wsize);
 	/* These must be after realloc (u or v may be the same as w).  */
 	up    = v->d;
 	vp    = u->d;
     }
     else {
 	usize = u->nlimbs;
 	usign = u->sign;
 	vsize = v->nlimbs;
 	vsign = v->sign ^ inv_v_sign;
 	wsize = usize + 1;
 	RESIZE_IF_NEEDED(w, wsize);
 	/* These must be after realloc (u or v may be the same as w).  */
 	up    = u->d;
 	vp    = v->d;
     }
     wp = w->d;
     wsign = 0;
 
     if( !vsize ) {  /* simple */
 	MPN_COPY(wp, up, usize );
 	wsize = usize;
 	wsign = usign;
     }
     else if( usign != vsign ) { /* different sign */
 	/* This test is right since USIZE >= VSIZE */
 	if( usize != vsize ) {
 	    _gcry_mpih_sub(wp, up, usize, vp, vsize);
 	    wsize = usize;
 	    MPN_NORMALIZE(wp, wsize);
 	    wsign = usign;
 	}
 	else if( _gcry_mpih_cmp(up, vp, usize) < 0 ) {
 	    _gcry_mpih_sub_n(wp, vp, up, usize);
 	    wsize = usize;
 	    MPN_NORMALIZE(wp, wsize);
 	    if( !usign )
 		wsign = 1;
 	}
 	else {
 	    _gcry_mpih_sub_n(wp, up, vp, usize);
 	    wsize = usize;
 	    MPN_NORMALIZE(wp, wsize);
 	    if( usign )
 		wsign = 1;
 	}
     }
     else { /* U and V have same sign. Add them. */
 	mpi_limb_t cy = _gcry_mpih_add(wp, up, usize, vp, vsize);
 	wp[usize] = cy;
 	wsize = usize + cy;
 	if( usign )
 	    wsign = 1;
     }
 
     w->nlimbs = wsize;
     w->sign = wsign;
 }
 
 
 /****************
  * Subtract the unsigned integer V from the mpi-integer U and store the
  * result in W.
  */
 void
 _gcry_mpi_sub_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long v )
 {
     mpi_ptr_t wp, up;
     mpi_size_t usize, wsize;
     int usign, wsign;
 
     usize = u->nlimbs;
     usign = u->sign;
     wsign = 0;
 
     /* If not space for W (and possible carry), increase space.  */
     wsize = usize + 1;
     if( w->alloced < wsize )
 	mpi_resize(w, wsize);
 
     /* These must be after realloc (U may be the same as W).  */
     up = u->d;
     wp = w->d;
 
     if( !usize ) {  /* simple */
 	wp[0] = v;
 	wsize = v? 1:0;
 	wsign = 1;
     }
     else if( usign ) {	/* mpi and v are negative */
 	mpi_limb_t cy;
 	cy = _gcry_mpih_add_1(wp, up, usize, v);
 	wp[usize] = cy;
 	wsize = usize + cy;
 	wsign = 1;
     }
     else {  /* The signs are different.  Need exact comparison to determine
 	     * which operand to subtract from which.  */
 	if( usize == 1 && up[0] < v ) {
 	    wp[0] = v - up[0];
 	    wsize = 1;
 	    wsign = 1;
 	}
 	else {
 	    _gcry_mpih_sub_1(wp, up, usize, v);
 	    /* Size can decrease with at most one limb. */
 	    wsize = usize - (wp[usize-1]==0);
 	}
     }
 
     w->nlimbs = wsize;
     w->sign   = wsign;
 }
 
 void
 _gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
 {
   _gcry_mpi_add_inv_sign (w, u, v, 0);
 }
 
 void
 _gcry_mpi_sub(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
 {
   _gcry_mpi_add_inv_sign (w, u, v, 1);
 }
 
 
 void
 _gcry_mpi_addm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
 {
   mpi_add (w, u, v);
   mpi_mod (w, w, m);
 }
 
 void
 _gcry_mpi_subm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
 {
   mpi_sub (w, u, v);
   mpi_mod (w, w, m);
 }
diff --git a/mpi/mpi-bit.c b/mpi/mpi-bit.c
index 7313a9d4..9cd40818 100644
--- a/mpi/mpi-bit.c
+++ b/mpi/mpi-bit.c
@@ -1,375 +1,375 @@
 /* mpi-bit.c  -  MPI bit level functions
  * Copyright (C) 1998, 1999, 2001, 2002, 2006 Free Software Foundation, Inc.
  * Copyright (C) 2013  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 
 #ifdef MPI_INTERNAL_NEED_CLZ_TAB
 #ifdef __STDC__
 const
 #endif
 unsigned char
 _gcry_clz_tab[] =
 {
   0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
 };
 #endif
 
 
 #define A_LIMB_1 ((mpi_limb_t)1)
 
 
 /****************
  * Sometimes we have MSL (most significant limbs) which are 0;
  * this is for some reasons not good, so this function removes them.
  */
 void
 _gcry_mpi_normalize( gcry_mpi_t a )
 {
     if( mpi_is_opaque(a) )
 	return;
 
     for( ; a->nlimbs && !a->d[a->nlimbs-1]; a->nlimbs-- )
 	;
 }
 
 
 
 /****************
  * Return the number of bits in A.
  */
 unsigned int
 _gcry_mpi_get_nbits (gcry_mpi_t a)
 {
     unsigned n;
 
     if( mpi_is_opaque(a) ) {
 	return a->sign; /* which holds the number of bits */
     }
 
     _gcry_mpi_normalize( a );
     if( a->nlimbs ) {
 	mpi_limb_t alimb = a->d[a->nlimbs-1];
 	if( alimb )
 	    count_leading_zeros( n, alimb );
 	else
 	    n = BITS_PER_MPI_LIMB;
 	n = BITS_PER_MPI_LIMB - n + (a->nlimbs-1) * BITS_PER_MPI_LIMB;
     }
     else
 	n = 0;
     return n;
 }
 
 
 /****************
  * Test whether bit N is set.
  */
 int
 _gcry_mpi_test_bit( gcry_mpi_t a, unsigned int n )
 {
     unsigned int limbno, bitno;
     mpi_limb_t limb;
 
     limbno = n / BITS_PER_MPI_LIMB;
     bitno  = n % BITS_PER_MPI_LIMB;
 
     if( limbno >= a->nlimbs )
 	return 0; /* too far left: this is a 0 */
     limb = a->d[limbno];
     return (limb & (A_LIMB_1 << bitno))? 1: 0;
 }
 
 
 /****************
  * Set bit N of A.
  */
 void
 _gcry_mpi_set_bit( gcry_mpi_t a, unsigned int n )
 {
   unsigned int i, limbno, bitno;
 
   if (mpi_is_immutable (a))
     {
       mpi_immutable_failed ();
       return;
     }
 
   limbno = n / BITS_PER_MPI_LIMB;
   bitno  = n % BITS_PER_MPI_LIMB;
 
   if ( limbno >= a->nlimbs )
     {
       for (i=a->nlimbs; i < a->alloced; i++)
         a->d[i] = 0;
       mpi_resize (a, limbno+1 );
       a->nlimbs = limbno+1;
     }
   a->d[limbno] |= (A_LIMB_1<<bitno);
 }
 
 /****************
  * Set bit N of A. and clear all bits above
  */
 void
 _gcry_mpi_set_highbit( gcry_mpi_t a, unsigned int n )
 {
   unsigned int i, limbno, bitno;
 
   if (mpi_is_immutable (a))
     {
       mpi_immutable_failed ();
       return;
     }
 
   limbno = n / BITS_PER_MPI_LIMB;
   bitno  = n % BITS_PER_MPI_LIMB;
 
   if ( limbno >= a->nlimbs )
     {
       for (i=a->nlimbs; i < a->alloced; i++)
         a->d[i] = 0;
       mpi_resize (a, limbno+1 );
       a->nlimbs = limbno+1;
     }
   a->d[limbno] |= (A_LIMB_1<<bitno);
   for ( bitno++; bitno < BITS_PER_MPI_LIMB; bitno++ )
     a->d[limbno] &= ~(A_LIMB_1 << bitno);
   a->nlimbs = limbno+1;
 }
 
 /****************
  * clear bit N of A and all bits above
  */
 void
 _gcry_mpi_clear_highbit( gcry_mpi_t a, unsigned int n )
 {
   unsigned int limbno, bitno;
 
   if (mpi_is_immutable (a))
     {
       mpi_immutable_failed ();
       return;
     }
 
   limbno = n / BITS_PER_MPI_LIMB;
   bitno  = n % BITS_PER_MPI_LIMB;
 
   if( limbno >= a->nlimbs )
     return; /* not allocated, therefore no need to clear bits :-) */
 
   for( ; bitno < BITS_PER_MPI_LIMB; bitno++ )
     a->d[limbno] &= ~(A_LIMB_1 << bitno);
   a->nlimbs = limbno+1;
 }
 
 /****************
  * Clear bit N of A.
  */
 void
 _gcry_mpi_clear_bit( gcry_mpi_t a, unsigned int n )
 {
   unsigned int limbno, bitno;
 
   if (mpi_is_immutable (a))
     {
       mpi_immutable_failed ();
       return;
     }
 
   limbno = n / BITS_PER_MPI_LIMB;
   bitno  = n % BITS_PER_MPI_LIMB;
 
   if (limbno >= a->nlimbs)
     return; /* Don't need to clear this bit, it's far too left.  */
   a->d[limbno] &= ~(A_LIMB_1 << bitno);
 }
 
 
 /****************
  * Shift A by COUNT limbs to the right
  * This is used only within the MPI library
  */
 void
 _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count )
 {
   mpi_ptr_t ap = a->d;
   mpi_size_t n = a->nlimbs;
   unsigned int i;
 
   if (mpi_is_immutable (a))
     {
       mpi_immutable_failed ();
       return;
     }
 
   if (count >= n)
     {
       a->nlimbs = 0;
       return;
     }
 
   for( i = 0; i < n - count; i++ )
     ap[i] = ap[i+count];
   ap[i] = 0;
   a->nlimbs -= count;
 }
 
 
 /*
  * Shift A by N bits to the right.
  */
 void
 _gcry_mpi_rshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n )
 {
   unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
   unsigned int nbits = (n%BITS_PER_MPI_LIMB);
   unsigned int i;
   mpi_size_t alimbs;
   mpi_ptr_t xp, ap;
 
   if (mpi_is_immutable (x))
     {
       mpi_immutable_failed ();
       return;
     }
 
   alimbs = a->nlimbs;
 
   if (x != a)
     {
       RESIZE_IF_NEEDED (x, alimbs);
       x->nlimbs = alimbs;
       x->flags = a->flags;
       x->sign = a->sign;
     }
 
   /* In-place operation.  */
   if (nlimbs >= alimbs)
     {
       x->nlimbs = 0;
       return;
     }
 
   xp = x->d;
   ap = a->d;
 
   if (alimbs && nbits)
     {
       _gcry_mpih_rshift (xp, ap + nlimbs, alimbs - nlimbs, nbits);
       if (nlimbs)
 	xp[alimbs - nlimbs] = 0;
       x->nlimbs -= nlimbs;
     }
   else if (nlimbs || (x != a))
     {
       for (i = 0; i < alimbs - nlimbs; i++ )
 	xp[i] = ap[i + nlimbs];
       if (nlimbs)
 	xp[i] = 0;
       x->nlimbs -= nlimbs;
     }
 
   MPN_NORMALIZE (x->d, x->nlimbs);
 }
 
 
 /****************
  * Shift A by COUNT limbs to the left
  * This is used only within the MPI library
  */
 void
 _gcry_mpi_lshift_limbs (gcry_mpi_t a, unsigned int count)
 {
   mpi_ptr_t ap;
   int n = a->nlimbs;
   int i;
 
   if (!count || !n)
     return;
 
   RESIZE_IF_NEEDED (a, n+count);
 
   ap = a->d;
   for (i = n-1; i >= 0; i--)
     ap[i+count] = ap[i];
   for (i=0; i < count; i++ )
     ap[i] = 0;
   a->nlimbs += count;
 }
 
 
 /*
  * Shift A by N bits to the left.
  */
 void
 _gcry_mpi_lshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n )
 {
   unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
   unsigned int nbits = (n%BITS_PER_MPI_LIMB);
   mpi_size_t alimbs;
   mpi_ptr_t xp, ap;
   int i;
 
   if (mpi_is_immutable (x))
     {
       mpi_immutable_failed ();
       return;
     }
 
   if (x == a && !n)
     return;  /* In-place shift with an amount of zero.  */
 
   /* Note: might be in-place operation, so a==x or a!=x. */
 
   alimbs = a->nlimbs;
 
   RESIZE_IF_NEEDED (x, alimbs + nlimbs + 1);
   xp = x->d;
   ap = a->d;
   if (nbits && alimbs)
     {
       x->nlimbs = alimbs + nlimbs + 1;
       xp[alimbs + nlimbs] = _gcry_mpih_lshift (xp + nlimbs, ap, alimbs, nbits);
     }
   else
     {
       x->nlimbs = alimbs + nlimbs;
       for (i = alimbs - 1; i >= 0; i--)
 	xp[i + nlimbs] = ap[i];
     }
   for (i = 0; i < nlimbs; i++)
     xp[i] = 0;
   x->flags = a->flags;
   x->sign = a->sign;
   MPN_NORMALIZE (x->d, x->nlimbs);
 }
diff --git a/mpi/mpi-cmp.c b/mpi/mpi-cmp.c
index 8927fa0e..bf2c338f 100644
--- a/mpi/mpi-cmp.c
+++ b/mpi/mpi-cmp.c
@@ -1,130 +1,130 @@
 /* mpi-cmp.c  -  MPI functions
  * Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 
 int
 _gcry_mpi_cmp_ui (gcry_mpi_t u, unsigned long v)
 {
   mpi_limb_t limb = v;
 
   _gcry_mpi_normalize (u);
 
   /* Handle the case that U contains no limb.  */
   if (u->nlimbs == 0)
     return -(limb != 0);
 
   /* Handle the case that U is negative.  */
   if (u->sign)
     return -1;
 
   if (u->nlimbs == 1)
     {
       /* Handle the case that U contains exactly one limb.  */
 
       if (u->d[0] > limb)
 	return 1;
       if (u->d[0] < limb)
 	return -1;
       return 0;
     }
   else
     /* Handle the case that U contains more than one limb.  */
     return 1;
 }
 
 
 /* Helper for _gcry_mpi_cmp and _gcry_mpi_cmpabs.  */
 static int
 do_mpi_cmp (gcry_mpi_t u, gcry_mpi_t v, int absmode)
 {
   mpi_size_t usize;
   mpi_size_t vsize;
   int usign;
   int vsign;
   int cmp;
 
   if (mpi_is_opaque (u) || mpi_is_opaque (v))
     {
       /* We have no signan and thus ABSMODE has no efeect here.  */
       if (mpi_is_opaque (u) && !mpi_is_opaque (v))
         return -1;
       if (!mpi_is_opaque (u) && mpi_is_opaque (v))
         return 1;
       if (!u->sign && !v->sign)
         return 0; /* Empty buffers are identical.  */
       if (u->sign < v->sign)
         return -1;
       if (u->sign > v->sign)
         return 1;
       return memcmp (u->d, v->d, (u->sign+7)/8);
     }
   else
     {
       _gcry_mpi_normalize (u);
       _gcry_mpi_normalize (v);
 
       usize = u->nlimbs;
       vsize = v->nlimbs;
       usign = absmode? 0 : u->sign;
       vsign = absmode? 0 : v->sign;
 
       /* Special treatment for +0 == -0 */
       if (!usize && !vsize)
         return 0;
 
       /* Compare sign bits.  */
       if (!usign && vsign)
         return 1;
       if (usign && !vsign)
         return -1;
 
       /* U and V are either both positive or both negative.  */
 
       if (usize != vsize && !usign && !vsign)
         return usize - vsize;
       if (usize != vsize && usign && vsign)
         return vsize + usize;
       if (!usize )
         return 0;
       if (!(cmp = _gcry_mpih_cmp (u->d, v->d, usize)))
         return 0;
       if ((cmp < 0?1:0) == (usign?1:0))
         return 1;
     }
   return -1;
 }
 
 
 int
 _gcry_mpi_cmp (gcry_mpi_t u, gcry_mpi_t v)
 {
   return do_mpi_cmp (u, v, 0);
 }
 
 /* Compare only the absolute values.  */
 int
 _gcry_mpi_cmpabs (gcry_mpi_t u, gcry_mpi_t v)
 {
   return do_mpi_cmp (u, v, 1);
 }
diff --git a/mpi/mpi-div.c b/mpi/mpi-div.c
index 166ab875..8d2a8573 100644
--- a/mpi/mpi-div.c
+++ b/mpi/mpi-div.c
@@ -1,360 +1,360 @@
 /* mpi-div.c  -  MPI functions
  * Copyright (C) 1994, 1996, 1998, 2001, 2002,
  *               2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 #include "g10lib.h"
 
 
 void
 _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor )
 {
     int divisor_sign = divisor->sign;
     gcry_mpi_t temp_divisor = NULL;
 
     /* We need the original value of the divisor after the remainder has been
      * preliminary calculated.	We have to copy it to temporary space if it's
      * the same variable as REM.  */
     if( rem == divisor ) {
 	temp_divisor = mpi_copy( divisor );
 	divisor = temp_divisor;
     }
 
     _gcry_mpi_tdiv_r( rem, dividend, divisor );
 
     if( ((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs )
 	mpi_add (rem, rem, divisor);
 
     if( temp_divisor )
 	mpi_free(temp_divisor);
 }
 
 
 
 /****************
  * Division rounding the quotient towards -infinity.
  * The remainder gets the same sign as the denominator.
  * rem is optional
  */
 
 unsigned long
 _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend,
                      unsigned long divisor )
 {
     mpi_limb_t rlimb;
 
     rlimb = _gcry_mpih_mod_1( dividend->d, dividend->nlimbs, divisor );
     if( rlimb && dividend->sign )
 	rlimb = divisor - rlimb;
 
     if( rem ) {
 	rem->d[0] = rlimb;
 	rem->nlimbs = rlimb? 1:0;
     }
     return rlimb;
 }
 
 
 void
 _gcry_mpi_fdiv_q( gcry_mpi_t quot, gcry_mpi_t dividend, gcry_mpi_t divisor )
 {
     gcry_mpi_t tmp = mpi_alloc( mpi_get_nlimbs(quot) );
     _gcry_mpi_fdiv_qr( quot, tmp, dividend, divisor);
     mpi_free(tmp);
 }
 
 void
 _gcry_mpi_fdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor )
 {
     int divisor_sign = divisor->sign;
     gcry_mpi_t temp_divisor = NULL;
 
     if( quot == divisor || rem == divisor ) {
 	temp_divisor = mpi_copy( divisor );
 	divisor = temp_divisor;
     }
 
     _gcry_mpi_tdiv_qr( quot, rem, dividend, divisor );
 
     if( (divisor_sign ^ dividend->sign) && rem->nlimbs ) {
 	mpi_sub_ui( quot, quot, 1 );
 	mpi_add( rem, rem, divisor);
     }
 
     if( temp_divisor )
 	mpi_free(temp_divisor);
 }
 
 
 /* If den == quot, den needs temporary storage.
  * If den == rem, den needs temporary storage.
  * If num == quot, num needs temporary storage.
  * If den has temporary storage, it can be normalized while being copied,
  *   i.e no extra storage should be allocated.
  */
 
 void
 _gcry_mpi_tdiv_r( gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den)
 {
     _gcry_mpi_tdiv_qr(NULL, rem, num, den );
 }
 
 void
 _gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den)
 {
     mpi_ptr_t np, dp;
     mpi_ptr_t qp, rp;
     mpi_size_t nsize = num->nlimbs;
     mpi_size_t dsize = den->nlimbs;
     mpi_size_t qsize, rsize;
     mpi_size_t sign_remainder = num->sign;
     mpi_size_t sign_quotient = num->sign ^ den->sign;
     unsigned normalization_steps;
     mpi_limb_t q_limb;
     mpi_ptr_t marker[5];
     unsigned int marker_nlimbs[5];
     int markidx=0;
 
     /* Ensure space is enough for quotient and remainder.
      * We need space for an extra limb in the remainder, because it's
      * up-shifted (normalized) below.  */
     rsize = nsize + 1;
     mpi_resize( rem, rsize);
 
     qsize = rsize - dsize;	  /* qsize cannot be bigger than this.	*/
     if( qsize <= 0 ) {
 	if( num != rem ) {
 	    rem->nlimbs = num->nlimbs;
 	    rem->sign = num->sign;
 	    MPN_COPY(rem->d, num->d, nsize);
 	}
 	if( quot ) {
 	    /* This needs to follow the assignment to rem, in case the
 	     * numerator and quotient are the same.  */
 	    quot->nlimbs = 0;
 	    quot->sign = 0;
 	}
 	return;
     }
 
     if( quot )
 	mpi_resize( quot, qsize);
 
     if (!dsize)
       _gcry_divide_by_zero();
 
     /* Read pointers here, when reallocation is finished.  */
     np = num->d;
     dp = den->d;
     rp = rem->d;
 
     /* Optimize division by a single-limb divisor.  */
     if( dsize == 1 ) {
 	mpi_limb_t rlimb;
 	if( quot ) {
 	    qp = quot->d;
 	    rlimb = _gcry_mpih_divmod_1( qp, np, nsize, dp[0] );
 	    qsize -= qp[qsize - 1] == 0;
 	    quot->nlimbs = qsize;
 	    quot->sign = sign_quotient;
 	}
 	else
 	    rlimb = _gcry_mpih_mod_1( np, nsize, dp[0] );
 	rp[0] = rlimb;
 	rsize = rlimb != 0?1:0;
 	rem->nlimbs = rsize;
 	rem->sign = sign_remainder;
 	return;
     }
 
 
     if( quot ) {
 	qp = quot->d;
 	/* Make sure QP and NP point to different objects.  Otherwise the
 	 * numerator would be gradually overwritten by the quotient limbs.  */
 	if(qp == np) { /* Copy NP object to temporary space.  */
             marker_nlimbs[markidx] = nsize;
 	    np = marker[markidx++] = mpi_alloc_limb_space(nsize,
 							  mpi_is_secure(quot));
 	    MPN_COPY(np, qp, nsize);
 	}
     }
     else /* Put quotient at top of remainder. */
 	qp = rp + dsize;
 
     count_leading_zeros( normalization_steps, dp[dsize - 1] );
 
     /* Normalize the denominator, i.e. make its most significant bit set by
      * shifting it NORMALIZATION_STEPS bits to the left.  Also shift the
      * numerator the same number of steps (to keep the quotient the same!).
      */
     if( normalization_steps ) {
 	mpi_ptr_t tp;
 	mpi_limb_t nlimb;
 
 	/* Shift up the denominator setting the most significant bit of
 	 * the most significant word.  Use temporary storage not to clobber
 	 * the original contents of the denominator.  */
         marker_nlimbs[markidx] = dsize;
 	tp = marker[markidx++] = mpi_alloc_limb_space(dsize,mpi_is_secure(den));
 	_gcry_mpih_lshift( tp, dp, dsize, normalization_steps );
 	dp = tp;
 
 	/* Shift up the numerator, possibly introducing a new most
 	 * significant word.  Move the shifted numerator in the remainder
 	 * meanwhile.  */
 	nlimb = _gcry_mpih_lshift(rp, np, nsize, normalization_steps);
 	if( nlimb ) {
 	    rp[nsize] = nlimb;
 	    rsize = nsize + 1;
 	}
 	else
 	    rsize = nsize;
     }
     else {
 	/* The denominator is already normalized, as required.	Copy it to
 	 * temporary space if it overlaps with the quotient or remainder.  */
 	if( dp == rp || (quot && (dp == qp))) {
 	    mpi_ptr_t tp;
 
             marker_nlimbs[markidx] = dsize;
 	    tp = marker[markidx++] = mpi_alloc_limb_space(dsize,
                                                           mpi_is_secure(den));
 	    MPN_COPY( tp, dp, dsize );
 	    dp = tp;
 	}
 
 	/* Move the numerator to the remainder.  */
 	if( rp != np )
 	    MPN_COPY(rp, np, nsize);
 
 	rsize = nsize;
     }
 
     q_limb = _gcry_mpih_divrem( qp, 0, rp, rsize, dp, dsize );
 
     if( quot ) {
 	qsize = rsize - dsize;
 	if(q_limb) {
 	    qp[qsize] = q_limb;
 	    qsize += 1;
 	}
 
 	quot->nlimbs = qsize;
 	quot->sign = sign_quotient;
     }
 
     rsize = dsize;
     MPN_NORMALIZE (rp, rsize);
 
     if( normalization_steps && rsize ) {
 	_gcry_mpih_rshift(rp, rp, rsize, normalization_steps);
 	rsize -= rp[rsize - 1] == 0?1:0;
     }
 
     rem->nlimbs = rsize;
     rem->sign	= sign_remainder;
     while( markidx )
       {
         markidx--;
 	_gcry_mpi_free_limb_space (marker[markidx], marker_nlimbs[markidx]);
       }
 }
 
 void
 _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned int count )
 {
     mpi_size_t usize, wsize;
     mpi_size_t limb_cnt;
 
     usize = u->nlimbs;
     limb_cnt = count / BITS_PER_MPI_LIMB;
     wsize = usize - limb_cnt;
     if( limb_cnt >= usize )
 	w->nlimbs = 0;
     else {
 	mpi_ptr_t wp;
 	mpi_ptr_t up;
 
 	RESIZE_IF_NEEDED( w, wsize );
 	wp = w->d;
 	up = u->d;
 
 	count %= BITS_PER_MPI_LIMB;
 	if( count ) {
 	    _gcry_mpih_rshift( wp, up + limb_cnt, wsize, count );
 	    wsize -= !wp[wsize - 1];
 	}
 	else {
 	    MPN_COPY_INCR( wp, up + limb_cnt, wsize);
 	}
 
 	w->nlimbs = wsize;
     }
 }
 
 /****************
  * Check whether dividend is divisible by divisor
  * (note: divisor must fit into a limb)
  */
 int
 _gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor )
 {
     return !_gcry_mpih_mod_1( dividend->d, dividend->nlimbs, divisor );
 }
 
 
 void
 _gcry_mpi_div (gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend,
                gcry_mpi_t divisor, int round)
 {
   if (!round)
     {
       if (!rem)
         {
           gcry_mpi_t tmp = mpi_alloc (mpi_get_nlimbs(quot));
           _gcry_mpi_tdiv_qr (quot, tmp, dividend, divisor);
           mpi_free (tmp);
         }
       else
         _gcry_mpi_tdiv_qr (quot, rem, dividend, divisor);
     }
   else if (round < 0)
     {
       if (!rem)
         _gcry_mpi_fdiv_q (quot, dividend, divisor);
       else if (!quot)
         _gcry_mpi_fdiv_r (rem, dividend, divisor);
       else
         _gcry_mpi_fdiv_qr (quot, rem, dividend, divisor);
     }
   else
     log_bug ("mpi rounding to ceiling not yet implemented\n");
 }
diff --git a/mpi/mpi-gcd.c b/mpi/mpi-gcd.c
index 77ca05a6..9cf3062e 100644
--- a/mpi/mpi-gcd.c
+++ b/mpi/mpi-gcd.c
@@ -1,52 +1,52 @@
 /* mpi-gcd.c  -  MPI functions
  *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 
 /****************
  * Find the greatest common divisor G of A and B.
  * Return: true if this 1, false in all other cases
  */
 int
 _gcry_mpi_gcd (gcry_mpi_t g, gcry_mpi_t xa, gcry_mpi_t xb)
 {
     gcry_mpi_t a, b;
 
     a = mpi_copy(xa);
     b = mpi_copy(xb);
 
     /* TAOCP Vol II, 4.5.2, Algorithm A */
     a->sign = 0;
     b->sign = 0;
     while (mpi_cmp_ui (b, 0))
       {
 	_gcry_mpi_fdiv_r( g, a, b ); /* G is used as temporary variable. */
 	mpi_set(a,b);
 	mpi_set(b,g);
       }
     mpi_set(g, a);
 
     mpi_free(a);
     mpi_free(b);
     return !mpi_cmp_ui( g, 1);
 }
diff --git a/mpi/mpi-inline.c b/mpi/mpi-inline.c
index 39e22224..c6c66f3f 100644
--- a/mpi/mpi-inline.c
+++ b/mpi/mpi-inline.c
@@ -1,35 +1,35 @@
 /* mpi-inline.c
  *	Copyright (C) 1999, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 /* put the inline functions as real functions into the lib */
 #define G10_MPI_INLINE_DECL
 
 #include "mpi-internal.h"
 
 /* always include the header because it is only
  * included by mpi-internal if __GCC__ is defined but we
  * need it here in all cases and the above definition of
  * of the macro allows us to do so
  */
 #include "mpi-inline.h"
diff --git a/mpi/mpi-inline.h b/mpi/mpi-inline.h
index 94e2aec8..090e8a94 100644
--- a/mpi/mpi-inline.h
+++ b/mpi/mpi-inline.h
@@ -1,161 +1,161 @@
 /* mpi-inline.h  -  Internal to the Multi Precision Integers
  * Copyright (C) 1994, 1996, 1998, 1999,
  *               2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #ifndef G10_MPI_INLINE_H
 #define G10_MPI_INLINE_H
 
 /* Starting with gcc 4.3 "extern inline" conforms in c99 mode to the
    c99 semantics.  To keep the useful old semantics we use an
    attribute.  */
 #ifndef G10_MPI_INLINE_DECL
 # ifdef __GNUC_STDC_INLINE__
 #  define G10_MPI_INLINE_DECL  extern inline __attribute__ ((__gnu_inline__))
 # else
 #  define G10_MPI_INLINE_DECL  extern __inline__
 # endif
 #endif
 
 G10_MPI_INLINE_DECL  mpi_limb_t
 _gcry_mpih_add_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 	       mpi_size_t s1_size, mpi_limb_t s2_limb)
 {
     mpi_limb_t x;
 
     x = *s1_ptr++;
     s2_limb += x;
     *res_ptr++ = s2_limb;
     if( s2_limb < x ) { /* sum is less than the left operand: handle carry */
 	while( --s1_size ) {
 	    x = *s1_ptr++ + 1;	/* add carry */
 	    *res_ptr++ = x;	/* and store */
 	    if( x )		/* not 0 (no overflow): we can stop */
 		goto leave;
 	}
 	return 1; /* return carry (size of s1 to small) */
     }
 
   leave:
     if( res_ptr != s1_ptr ) { /* not the same variable */
 	mpi_size_t i;	       /* copy the rest */
 	for( i=0; i < s1_size-1; i++ )
 	    res_ptr[i] = s1_ptr[i];
     }
     return 0; /* no carry */
 }
 
 
 
 G10_MPI_INLINE_DECL mpi_limb_t
 _gcry_mpih_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 			       mpi_ptr_t s2_ptr, mpi_size_t s2_size)
 {
     mpi_limb_t cy = 0;
 
     if( s2_size )
 	cy = _gcry_mpih_add_n( res_ptr, s1_ptr, s2_ptr, s2_size );
 
     if( s1_size - s2_size )
 	cy = _gcry_mpih_add_1( res_ptr + s2_size, s1_ptr + s2_size,
 			    s1_size - s2_size, cy);
     return cy;
 }
 
 
 G10_MPI_INLINE_DECL mpi_limb_t
 _gcry_mpih_sub_1(mpi_ptr_t res_ptr,  mpi_ptr_t s1_ptr,
 	      mpi_size_t s1_size, mpi_limb_t s2_limb )
 {
     mpi_limb_t x;
 
     x = *s1_ptr++;
     s2_limb = x - s2_limb;
     *res_ptr++ = s2_limb;
     if( s2_limb > x ) {
 	while( --s1_size ) {
 	    x = *s1_ptr++;
 	    *res_ptr++ = x - 1;
 	    if( x )
 		goto leave;
 	}
 	return 1;
     }
 
   leave:
     if( res_ptr != s1_ptr ) {
 	mpi_size_t i;
 	for( i=0; i < s1_size-1; i++ )
 	    res_ptr[i] = s1_ptr[i];
     }
     return 0;
 }
 
 
 
 G10_MPI_INLINE_DECL   mpi_limb_t
 _gcry_mpih_sub( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 				mpi_ptr_t s2_ptr, mpi_size_t s2_size)
 {
     mpi_limb_t cy = 0;
 
     if( s2_size )
 	cy = _gcry_mpih_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
 
     if( s1_size - s2_size )
 	cy = _gcry_mpih_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
 				      s1_size - s2_size, cy);
     return cy;
 }
 
 /****************
  * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
  * There are no restrictions on the relative sizes of
  * the two arguments.
  * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.
  */
 G10_MPI_INLINE_DECL int
 _gcry_mpih_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size )
 {
     mpi_size_t i;
     mpi_limb_t op1_word, op2_word;
 
     for( i = size - 1; i >= 0 ; i--) {
 	op1_word = op1_ptr[i];
 	op2_word = op2_ptr[i];
 	if( op1_word != op2_word )
 	    goto diff;
     }
     return 0;
 
   diff:
     /* This can *not* be simplified to
      *	 op2_word - op2_word
      * since that expression might give signed overflow.  */
     return (op1_word > op2_word) ? 1 : -1;
 }
 
 
 #endif /*G10_MPI_INLINE_H*/
diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h
index 3ef4a205..79a6cce7 100644
--- a/mpi/mpi-internal.h
+++ b/mpi/mpi-internal.h
@@ -1,305 +1,305 @@
 /* mpi-internal.h  -  Internal to the Multi Precision Integers
  * Copyright (C) 1994, 1996, 1998, 2000, 2002,
  *               2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #ifndef G10_MPI_INTERNAL_H
 #define G10_MPI_INTERNAL_H
 
 #include "mpi-asm-defs.h"
 
 #ifndef BITS_PER_MPI_LIMB
 #if BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_INT
   typedef unsigned int mpi_limb_t;
   typedef   signed int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG
   typedef unsigned long int mpi_limb_t;
   typedef   signed long int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG_LONG
   typedef unsigned long long int mpi_limb_t;
   typedef   signed long long int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_SHORT
   typedef unsigned short int mpi_limb_t;
   typedef   signed short int mpi_limb_signed_t;
 #else
 #error BYTES_PER_MPI_LIMB does not match any C type
 #endif
 #define BITS_PER_MPI_LIMB    (8*BYTES_PER_MPI_LIMB)
 #endif /*BITS_PER_MPI_LIMB*/
 
 #include "mpi.h"
 
 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
  * value which is good on most machines.  */
 
 /* tested 4, 16, 32 and 64, where 16 gave the best performance when
  * checking a 768 and a 1024 bit ElGamal signature.
  * (wk 22.12.97) */
 #ifndef KARATSUBA_THRESHOLD
 #define KARATSUBA_THRESHOLD 16
 #endif
 
 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
 #if KARATSUBA_THRESHOLD < 2
 #undef KARATSUBA_THRESHOLD
 #define KARATSUBA_THRESHOLD 2
 #endif
 
 
 typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */
 typedef int mpi_size_t;        /* (must be a signed type) */
 
 #define ABS(x) (x >= 0 ? x : -x)
 #define MIN(l,o) ((l) < (o) ? (l) : (o))
 #define MAX(h,i) ((h) > (i) ? (h) : (i))
 #define RESIZE_IF_NEEDED(a,b) \
     do {			   \
 	if( (a)->alloced < (b) )   \
 	    mpi_resize((a), (b));  \
     } while(0)
 #define RESIZE_AND_CLEAR_IF_NEEDED(a,b) \
     do {			   \
 	if( (a)->nlimbs < (b) )   \
 	    mpi_resize((a), (b));  \
     } while(0)
 
 /* Copy N limbs from S to D.  */
 #define MPN_COPY( d, s, n) \
     do {				\
 	mpi_size_t _i;			\
 	for( _i = 0; _i < (n); _i++ )	\
 	    (d)[_i] = (s)[_i];		\
     } while(0)
 
 #define MPN_COPY_INCR( d, s, n) 	\
     do {				\
 	mpi_size_t _i;			\
 	for( _i = 0; _i < (n); _i++ )	\
 	    (d)[_i] = (s)[_i];		\
     } while (0)
 
 #define MPN_COPY_DECR( d, s, n ) \
     do {				\
 	mpi_size_t _i;			\
 	for( _i = (n)-1; _i >= 0; _i--) \
 	   (d)[_i] = (s)[_i];		\
     } while(0)
 
 /* Zero N limbs at D */
 #define MPN_ZERO(d, n) \
     do {				  \
 	int  _i;			  \
 	for( _i = 0; _i < (n); _i++ )  \
 	    (d)[_i] = 0;		    \
     } while (0)
 
 #define MPN_NORMALIZE(d, n)  \
     do {		       \
 	while( (n) > 0 ) {     \
 	    if( (d)[(n)-1] ) \
 		break;	       \
 	    (n)--;	       \
 	}		       \
     } while(0)
 
 #define MPN_NORMALIZE_NOT_ZERO(d, n) \
     do {				    \
 	for(;;) {			    \
 	    if( (d)[(n)-1] )		    \
 		break;			    \
 	    (n)--;			    \
 	}				    \
     } while(0)
 
 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
     do {						\
 	if( (size) < KARATSUBA_THRESHOLD )		\
 	    mul_n_basecase (prodp, up, vp, size);	\
 	else						\
 	    mul_n (prodp, up, vp, size, tspace);	\
     } while (0)
 
 
 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
  * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
  * If this would yield overflow, DI should be the largest possible number
  * (i.e., only ones).  For correct operation, the most significant bit of D
  * has to be set.  Put the quotient in Q and the remainder in R.
  */
 #define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
     do {							    \
         mpi_limb_t _ql GCC_ATTR_UNUSED;                               \
 	mpi_limb_t _q, _r;                                          \
 	mpi_limb_t _xh, _xl;					    \
 	umul_ppmm (_q, _ql, (nh), (di));			    \
 	_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */  \
 	umul_ppmm (_xh, _xl, _q, (d));				    \
 	sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);		    \
 	if( _xh ) {						    \
 	    sub_ddmmss (_xh, _r, _xh, _r, 0, (d));		    \
 	    _q++;						    \
 	    if( _xh) {						    \
 		sub_ddmmss (_xh, _r, _xh, _r, 0, (d));		    \
 		_q++;						    \
 	    }							    \
 	}							    \
 	if( _r >= (d) ) {					    \
 	    _r -= (d);						    \
 	    _q++;						    \
 	}							    \
 	(r) = _r;						    \
 	(q) = _q;						    \
     } while (0)
 
 
 /*-- mpiutil.c --*/
 #define mpi_alloc_limb_space(n,f)  _gcry_mpi_alloc_limb_space((n),(f))
 mpi_ptr_t _gcry_mpi_alloc_limb_space( unsigned nlimbs, int sec );
 void _gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs );
 void _gcry_mpi_assign_limb_space( gcry_mpi_t a, mpi_ptr_t ap, unsigned nlimbs );
 
 /*-- mpi-bit.c --*/
 #define mpi_rshift_limbs(a,n)  _gcry_mpi_rshift_limbs ((a), (n))
 #define mpi_lshift_limbs(a,n)  _gcry_mpi_lshift_limbs ((a), (n))
 
 void _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count );
 void _gcry_mpi_lshift_limbs( gcry_mpi_t a, unsigned int count );
 
 
 /*-- mpih-add.c --*/
 mpi_limb_t _gcry_mpih_add_1(mpi_ptr_t res_ptr,  mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb );
 mpi_limb_t _gcry_mpih_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			  mpi_ptr_t s2_ptr,  mpi_size_t size);
 mpi_limb_t _gcry_mpih_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
 /*-- mpih-sub.c --*/
 mpi_limb_t _gcry_mpih_sub_1( mpi_ptr_t res_ptr,  mpi_ptr_t s1_ptr,
 			  mpi_size_t s1_size, mpi_limb_t s2_limb );
 mpi_limb_t _gcry_mpih_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			  mpi_ptr_t s2_ptr, mpi_size_t size);
 mpi_limb_t _gcry_mpih_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
 /*-- mpih-cmp.c --*/
 int _gcry_mpih_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size );
 
 /*-- mpih-mul.c --*/
 
 struct karatsuba_ctx {
     struct karatsuba_ctx *next;
     mpi_ptr_t tspace;
     unsigned int tspace_nlimbs;
     mpi_size_t tspace_size;
     mpi_ptr_t tp;
     unsigned int tp_nlimbs;
     mpi_size_t tp_size;
 };
 
 void _gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx );
 
 mpi_limb_t _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			     mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t _gcry_mpih_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			     mpi_size_t s1_size, mpi_limb_t s2_limb);
 void _gcry_mpih_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
 						   mpi_size_t size);
 mpi_limb_t _gcry_mpih_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 					 mpi_ptr_t vp, mpi_size_t vsize);
 void _gcry_mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size );
 void _gcry_mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
 						mpi_ptr_t tspace);
 
 void _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp,
 				 mpi_ptr_t up, mpi_size_t usize,
 				 mpi_ptr_t vp, mpi_size_t vsize,
 				 struct karatsuba_ctx *ctx );
 
 
 /*-- mpih-mul_1.c (or xxx/cpu/ *.S) --*/
 mpi_limb_t _gcry_mpih_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			  mpi_size_t s1_size, mpi_limb_t s2_limb);
 
 /*-- mpih-div.c --*/
 mpi_limb_t _gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
 						 mpi_limb_t divisor_limb);
 mpi_limb_t _gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
 			   mpi_ptr_t np, mpi_size_t nsize,
 			   mpi_ptr_t dp, mpi_size_t dsize);
 mpi_limb_t _gcry_mpih_divmod_1( mpi_ptr_t quot_ptr,
 			     mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
 			     mpi_limb_t divisor_limb);
 
 /*-- mpih-shift.c --*/
 mpi_limb_t _gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
 							   unsigned cnt);
 mpi_limb_t _gcry_mpih_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
 							   unsigned cnt);
 
 /*-- mpih-const-time.c --*/
 #define mpih_set_cond(w,u,s,o) _gcry_mpih_set_cond ((w),(u),(s),(o))
 #define mpih_add_n_cond(w,u,v,s,o) _gcry_mpih_add_n_cond ((w),(u),(v),(s),(o))
 #define mpih_sub_n_cond(w,u,v,s,o) _gcry_mpih_sub_n_cond ((w),(u),(v),(s),(o))
 #define mpih_swap_cond(u,v,s,o) _gcry_mpih_swap_cond ((u),(v),(s),(o))
 #define mpih_abs_cond(w,u,s,o) _gcry_mpih_abs_cond ((w),(u),(s),(o))
 #define mpih_mod(v,vs,u,us) _gcry_mpih_mod ((v),(vs),(u),(us))
 
 void _gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
                           unsigned long op_enable);
 mpi_limb_t _gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
                                   mpi_size_t usize, unsigned long op_enable);
 mpi_limb_t _gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
                                   mpi_size_t usize, unsigned long op_enable);
 void _gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize,
                            unsigned long op_enable);
 void _gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up,
                           mpi_size_t usize, unsigned long op_enable);
 mpi_ptr_t _gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize,
                           mpi_ptr_t up, mpi_size_t usize);
 int _gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v);
 
 
 /* Define stuff for longlong.h.  */
 #define W_TYPE_SIZE BITS_PER_MPI_LIMB
   typedef mpi_limb_t   UWtype;
   typedef unsigned int UHWtype;
 #if defined (__GNUC__)
   typedef unsigned int UQItype	  __attribute__ ((mode (QI)));
   typedef	   int SItype	  __attribute__ ((mode (SI)));
   typedef unsigned int USItype	  __attribute__ ((mode (SI)));
   typedef	   int DItype	  __attribute__ ((mode (DI)));
   typedef unsigned int UDItype	  __attribute__ ((mode (DI)));
 #else
   typedef unsigned char UQItype;
   typedef	   long SItype;
   typedef unsigned long USItype;
 #endif
 
 #ifdef __GNUC__
 #include "mpi-inline.h"
 #endif
 
 #endif /*G10_MPI_INTERNAL_H*/
diff --git a/mpi/mpi-mpow.c b/mpi/mpi-mpow.c
index 43bd641f..bbd102e3 100644
--- a/mpi/mpi-mpow.c
+++ b/mpi/mpi-mpow.c
@@ -1,223 +1,223 @@
 /* mpi-mpow.c  -  MPI functions
  *	Copyright (C) 1998, 1999, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 #include "mpi-internal.h"
 #include "longlong.h"
 #include "g10lib.h"
 
 
 /* Barrett is slower than the classical way.  It can be tweaked by
  * using partial multiplications
  */
 /*#define USE_BARRETT*/
 
 
 
 #ifdef USE_BARRETT
 static void barrett_mulm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m, gcry_mpi_t y, int k, gcry_mpi_t r1, gcry_mpi_t r2 );
 static gcry_mpi_t init_barrett( gcry_mpi_t m, int *k, gcry_mpi_t *r1, gcry_mpi_t *r2 );
 static int calc_barrett( gcry_mpi_t r, gcry_mpi_t x, gcry_mpi_t m, gcry_mpi_t y, int k, gcry_mpi_t r1, gcry_mpi_t r2  );
 #else
 #define barrett_mulm( w, u, v, m, y, k, r1, r2 ) _gcry_mpi_mulm( (w), (u), (v), (m) )
 #endif
 
 
 static int
 build_index( gcry_mpi_t *exparray, int k, int i, int t )
 {
     int j, bitno;
     int idx = 0;
 
     bitno = t-i;
     for(j=k-1; j >= 0; j-- ) {
 	idx <<= 1;
 	if( mpi_test_bit( exparray[j], bitno ) )
 	    idx |= 1;
     }
     /*log_debug("t=%d i=%d idx=%d\n", t, i, idx );*/
     return idx;
 }
 
 /****************
  * RES = (BASE[0] ^ EXP[0]) *  (BASE[1] ^ EXP[1]) * ... * mod M
  */
 void
 _gcry_mpi_mulpowm( gcry_mpi_t res, gcry_mpi_t *basearray, gcry_mpi_t *exparray, gcry_mpi_t m)
 {
     int k;	/* number of elements */
     int t;	/* bit size of largest exponent */
     int i, j, idx;
     gcry_mpi_t *G;	/* table with precomputed values of size 2^k */
     gcry_mpi_t tmp;
 #ifdef USE_BARRETT
     gcry_mpi_t barrett_y, barrett_r1, barrett_r2;
     int barrett_k;
 #endif
 
     for(k=0; basearray[k]; k++ )
 	;
     gcry_assert(k);
     for(t=0, i=0; (tmp=exparray[i]); i++ ) {
 	/*log_mpidump("exp: ", tmp );*/
 	j = mpi_get_nbits(tmp);
 	if( j > t )
 	    t = j;
     }
     /*log_mpidump("mod: ", m );*/
     gcry_assert (i==k);
     gcry_assert (t);
     gcry_assert (k < 10);
 
     G = xcalloc( (1<<k) , sizeof *G );
 #ifdef USE_BARRETT
     barrett_y = init_barrett( m, &barrett_k, &barrett_r1, &barrett_r2 );
 #endif
     /* and calculate */
     tmp =  mpi_alloc( mpi_get_nlimbs(m)+1 );
     mpi_set_ui( res, 1 );
     for(i = 1; i <= t; i++ ) {
 	barrett_mulm(tmp, res, res, m, barrett_y, barrett_k,
 				       barrett_r1, barrett_r2 );
 	idx = build_index( exparray, k, i, t );
 	gcry_assert (idx >= 0 && idx < (1<<k));
 	if( !G[idx] ) {
 	    if( !idx )
 		 G[0] = mpi_alloc_set_ui( 1 );
 	    else {
 		for(j=0; j < k; j++ ) {
 		    if( (idx & (1<<j) ) ) {
 			if( !G[idx] )
 			    G[idx] = mpi_copy( basearray[j] );
 			else
 			    barrett_mulm( G[idx], G[idx], basearray[j],
 					       m, barrett_y, barrett_k, barrett_r1, barrett_r2	);
 		    }
 		}
 		if( !G[idx] )
 		    G[idx] = mpi_alloc(0);
 	    }
 	}
 	barrett_mulm(res, tmp, G[idx], m, barrett_y, barrett_k, barrett_r1, barrett_r2	);
     }
 
     /* cleanup */
     mpi_free(tmp);
 #ifdef USE_BARRETT
     mpi_free(barrett_y);
     mpi_free(barrett_r1);
     mpi_free(barrett_r2);
 #endif
     for(i=0; i < (1<<k); i++ )
 	mpi_free(G[i]);
     xfree(G);
 }
 
 
 
 #ifdef USE_BARRETT
 static void
 barrett_mulm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m, gcry_mpi_t y, int k, gcry_mpi_t r1, gcry_mpi_t r2	)
 {
     mpi_mul(w, u, v);
     if( calc_barrett( w, w, m, y, k, r1, r2 ) )
 	mpi_fdiv_r( w, w, m );
 }
 
 /****************
  * Barrett precalculation: y = floor(b^(2k) / m)
  */
 static gcry_mpi_t
 init_barrett( gcry_mpi_t m, int *k, gcry_mpi_t *r1, gcry_mpi_t *r2 )
 {
     gcry_mpi_t tmp;
 
     mpi_normalize( m );
     *k = mpi_get_nlimbs( m );
     tmp = mpi_alloc( *k + 1 );
     mpi_set_ui( tmp, 1 );
     mpi_lshift_limbs( tmp, 2 * *k );
     mpi_fdiv_q( tmp, tmp, m );
     *r1 = mpi_alloc( 2* *k + 1 );
     *r2 = mpi_alloc( 2* *k + 1 );
     return tmp;
 }
 
 /****************
  * Barrett reduction: We assume that these conditions are met:
  * Given x =(x_2k-1 ...x_0)_b
  *	 m =(m_k-1 ....m_0)_b	  with m_k-1 != 0
  * Output r = x mod m
  * Before using this function init_barret must be used to calucalte y and k.
  * Returns: false = no error
  *	    true = can't perform barret reduction
  */
 static int
 calc_barrett( gcry_mpi_t r, gcry_mpi_t x, gcry_mpi_t m, gcry_mpi_t y, int k, gcry_mpi_t r1, gcry_mpi_t r2 )
 {
     int xx = k > 3 ? k-3:0;
 
     mpi_normalize( x );
     if( mpi_get_nlimbs(x) > 2*k )
 	return 1; /* can't do it */
 
     /* 1. q1 = floor( x / b^k-1)
      *	  q2 = q1 * y
      *	  q3 = floor( q2 / b^k+1 )
      * Actually, we don't need qx, we can work direct on r2
      */
     mpi_set( r2, x );
     mpi_rshift_limbs( r2, k-1 );
     mpi_mul( r2, r2, y );
     mpi_rshift_limbs( r2, k+1 );
 
     /* 2. r1 = x mod b^k+1
      *	  r2 = q3 * m mod b^k+1
      *	  r  = r1 - r2
      * 3. if r < 0 then  r = r + b^k+1
      */
     mpi_set( r1, x );
     if( r1->nlimbs > k+1 ) /* quick modulo operation */
 	r1->nlimbs = k+1;
     mpi_mul( r2, r2, m );
     if( r2->nlimbs > k+1 ) /* quick modulo operation */
 	r2->nlimbs = k+1;
     mpi_sub( r, r1, r2 );
 
     if( mpi_has_sign (r) ) {
 	gcry_mpi_t tmp;
 
 	tmp = mpi_alloc( k + 2 );
 	mpi_set_ui( tmp, 1 );
 	mpi_lshift_limbs( tmp, k+1 );
 	mpi_add( r, r, tmp );
 	mpi_free(tmp);
     }
 
     /* 4. while r >= m do r = r - m */
     while( mpi_cmp( r, m ) >= 0 )
 	mpi_sub( r, r, m );
 
     return 0;
 }
 #endif /* USE_BARRETT */
diff --git a/mpi/mpi-mul.c b/mpi/mpi-mul.c
index 4f4d7096..60f1ca48 100644
--- a/mpi/mpi-mul.c
+++ b/mpi/mpi-mul.c
@@ -1,212 +1,212 @@
 /* mpi-mul.c  -  MPI functions
  * Copyright (C) 1994, 1996, 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 
 
 void
 _gcry_mpi_mul_ui (gcry_mpi_t prod, gcry_mpi_t mult, unsigned long small_mult)
 {
     mpi_size_t size, prod_size;
     mpi_ptr_t  prod_ptr;
     mpi_limb_t cy;
     int sign;
 
     size = mult->nlimbs;
     sign = mult->sign;
 
     if( !size || !small_mult ) {
 	prod->nlimbs = 0;
 	prod->sign = 0;
 	return;
     }
 
     prod_size = size + 1;
     if( prod->alloced < prod_size )
 	mpi_resize( prod, prod_size );
     prod_ptr = prod->d;
 
     cy = _gcry_mpih_mul_1( prod_ptr, mult->d, size, (mpi_limb_t)small_mult );
     if( cy )
 	prod_ptr[size++] = cy;
     prod->nlimbs = size;
     prod->sign = sign;
 }
 
 
 void
 _gcry_mpi_mul_2exp (gcry_mpi_t w, gcry_mpi_t u, unsigned long cnt)
 {
     mpi_size_t usize, wsize, limb_cnt;
     mpi_ptr_t wp;
     mpi_limb_t wlimb;
     int usign, wsign;
 
     usize = u->nlimbs;
     usign = u->sign;
 
     if( !usize ) {
 	w->nlimbs = 0;
 	w->sign = 0;
 	return;
     }
 
     limb_cnt = cnt / BITS_PER_MPI_LIMB;
     wsize = usize + limb_cnt + 1;
     if( w->alloced < wsize )
 	mpi_resize(w, wsize );
     wp = w->d;
     wsize = usize + limb_cnt;
     wsign = usign;
 
     cnt %= BITS_PER_MPI_LIMB;
     if( cnt ) {
 	wlimb = _gcry_mpih_lshift( wp + limb_cnt, u->d, usize, cnt );
 	if( wlimb ) {
 	    wp[wsize] = wlimb;
 	    wsize++;
 	}
     }
     else {
 	MPN_COPY_DECR( wp + limb_cnt, u->d, usize );
     }
 
     /* Zero all whole limbs at low end.  Do it here and not before calling
      * mpn_lshift, not to lose for U == W.  */
     MPN_ZERO( wp, limb_cnt );
 
     w->nlimbs = wsize;
     w->sign = wsign;
 }
 
 
 void
 _gcry_mpi_mul (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
 {
     mpi_size_t usize, vsize, wsize;
     mpi_ptr_t up, vp, wp;
     mpi_limb_t cy;
     int usign, vsign, usecure, vsecure, sign_product;
     int assign_wp=0;
     mpi_ptr_t tmp_limb=NULL;
     unsigned int tmp_limb_nlimbs = 0;
 
     if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */
 	usize = v->nlimbs;
 	usign = v->sign;
 	usecure = mpi_is_secure(v);
 	up    = v->d;
 	vsize = u->nlimbs;
 	vsign = u->sign;
 	vsecure = mpi_is_secure(u);
 	vp    = u->d;
     }
     else {
 	usize = u->nlimbs;
 	usign = u->sign;
 	usecure = mpi_is_secure(u);
 	up    = u->d;
 	vsize = v->nlimbs;
 	vsign = v->sign;
 	vsecure = mpi_is_secure(v);
 	vp    = v->d;
     }
     sign_product = usign ^ vsign;
     wp = w->d;
 
     /* Ensure W has space enough to store the result.  */
     wsize = usize + vsize;
     if ( !mpi_is_secure (w) && (mpi_is_secure (u) || mpi_is_secure (v)) ) {
         /* w is not allocated in secure space but u or v is.  To make sure
          * that no temporray results are stored in w, we temporary use
          * a newly allocated limb space for w */
         wp = mpi_alloc_limb_space( wsize, 1 );
         assign_wp = 2; /* mark it as 2 so that we can later copy it back to
                         * mormal memory */
     }
     else if( w->alloced < wsize ) {
 	if( wp == up || wp == vp ) {
 	    wp = mpi_alloc_limb_space( wsize, mpi_is_secure(w) );
 	    assign_wp = 1;
 	}
 	else {
 	    mpi_resize(w, wsize );
 	    wp = w->d;
 	}
     }
     else { /* Make U and V not overlap with W.	*/
 	if( wp == up ) {
 	    /* W and U are identical.  Allocate temporary space for U.	*/
             tmp_limb_nlimbs = usize;
 	    up = tmp_limb = mpi_alloc_limb_space( usize, usecure  );
 	    /* Is V identical too?  Keep it identical with U.  */
 	    if( wp == vp )
 		vp = up;
 	    /* Copy to the temporary space.  */
 	    MPN_COPY( up, wp, usize );
 	}
 	else if( wp == vp ) {
 	    /* W and V are identical.  Allocate temporary space for V.	*/
             tmp_limb_nlimbs = vsize;
 	    vp = tmp_limb = mpi_alloc_limb_space( vsize, vsecure );
 	    /* Copy to the temporary space.  */
 	    MPN_COPY( vp, wp, vsize );
 	}
     }
 
     if( !vsize )
 	wsize = 0;
     else {
 	cy = _gcry_mpih_mul( wp, up, usize, vp, vsize );
 	wsize -= cy? 0:1;
     }
 
     if( assign_wp ) {
         if (assign_wp == 2) {
             /* copy the temp wp from secure memory back to normal memory */
 	    mpi_ptr_t tmp_wp = mpi_alloc_limb_space (wsize, 0);
 	    MPN_COPY (tmp_wp, wp, wsize);
             _gcry_mpi_free_limb_space (wp, 0);
             wp = tmp_wp;
         }
 	_gcry_mpi_assign_limb_space( w, wp, wsize );
     }
     w->nlimbs = wsize;
     w->sign = sign_product;
     if( tmp_limb )
 	_gcry_mpi_free_limb_space (tmp_limb, tmp_limb_nlimbs);
 }
 
 
 void
 _gcry_mpi_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
 {
   mpi_mul (w, u, v);
   _gcry_mpi_tdiv_r (w, w, m);
 }
diff --git a/mpi/mpi-scan.c b/mpi/mpi-scan.c
index e27f7faa..a93bd607 100644
--- a/mpi/mpi-scan.c
+++ b/mpi/mpi-scan.c
@@ -1,130 +1,130 @@
 /* mpi-scan.c  -  MPI functions
  *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 /****************
  * Scan through an mpi and return byte for byte. a -1 is returned to indicate
  * the end of the mpi. Scanning is done from the lsb to the msb, returned
  * values are in the range of 0 .. 255.
  *
  * FIXME: This code is VERY ugly!
  */
 /* int */
 /* _gcry_mpi_getbyte( gcry_mpi_t a, unsigned idx ) */
 /* { */
 /*     int i, j; */
 /*     unsigned n; */
 /*     mpi_ptr_t ap; */
 /*     mpi_limb_t limb; */
 
 /*     ap = a->d; */
 /*     for(n=0,i=0; i < a->nlimbs; i++ ) { */
 /* 	limb = ap[i]; */
 /* 	for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ ) */
 /* 	    if( n == idx ) */
 /* 		return (limb >> j*8) & 0xff; */
 /*     } */
 /*     return -1; */
 /* } */
 
 
 /****************
  * Put a value at position IDX into A. idx counts from lsb to msb
  */
 /* void */
 /* _gcry_mpi_putbyte( gcry_mpi_t a, unsigned idx, int xc ) */
 /* { */
 /*     int i, j; */
 /*     unsigned n; */
 /*     mpi_ptr_t ap; */
 /*     mpi_limb_t limb, c; */
 
 /*     c = xc & 0xff; */
 /*     ap = a->d; */
 /*     for(n=0,i=0; i < a->alloced; i++ ) { */
 /* 	limb = ap[i]; */
 /* 	for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ ) */
 /* 	    if( n == idx ) { */
 /* 	      #if BYTES_PER_MPI_LIMB == 4 */
 /* 		if( j == 0 ) */
 /* 		    limb = (limb & 0xffffff00) | c; */
 /* 		else if( j == 1 ) */
 /* 		    limb = (limb & 0xffff00ff) | (c<<8); */
 /* 		else if( j == 2 ) */
 /* 		    limb = (limb & 0xff00ffff) | (c<<16); */
 /* 		else */
 /* 		    limb = (limb & 0x00ffffff) | (c<<24); */
 /* 	      #elif BYTES_PER_MPI_LIMB == 8 */
 /* 		if( j == 0 ) */
 /* 		    limb = (limb & 0xffffffffffffff00) | c; */
 /* 		else if( j == 1 ) */
 /* 		    limb = (limb & 0xffffffffffff00ff) | (c<<8); */
 /* 		else if( j == 2 ) */
 /* 		    limb = (limb & 0xffffffffff00ffff) | (c<<16); */
 /* 		else if( j == 3 ) */
 /* 		    limb = (limb & 0xffffffff00ffffff) | (c<<24); */
 /* 		else if( j == 4 ) */
 /* 		    limb = (limb & 0xffffff00ffffffff) | (c<<32); */
 /* 		else if( j == 5 ) */
 /* 		    limb = (limb & 0xffff00ffffffffff) | (c<<40); */
 /* 		else if( j == 6 ) */
 /* 		    limb = (limb & 0xff00ffffffffffff) | (c<<48); */
 /* 		else */
 /* 		    limb = (limb & 0x00ffffffffffffff) | (c<<56); */
 /* 	      #else */
 /* 		 #error please enhance this function, its ugly - i know. */
 /* 	      #endif */
 /* 		if( a->nlimbs <= i ) */
 /* 		    a->nlimbs = i+1; */
 /* 		ap[i] = limb; */
 /* 		return; */
 /* 	    } */
 /*     } */
 /*     abort(); /\* index out of range *\/ */
 /* } */
 
 
 /****************
  * Count the number of zerobits at the low end of A
  */
 unsigned
 _gcry_mpi_trailing_zeros( gcry_mpi_t a )
 {
     unsigned n, count = 0;
 
     for(n=0; n < a->nlimbs; n++ ) {
 	if( a->d[n] ) {
 	    unsigned nn;
 	    mpi_limb_t alimb = a->d[n];
 
 	    count_trailing_zeros( nn, alimb );
 	    count += nn;
 	    break;
 	}
 	count += BITS_PER_MPI_LIMB;
     }
     return count;
 
 }
diff --git a/mpi/mpih-div.c b/mpi/mpih-div.c
index 57c1b584..0f3849d6 100644
--- a/mpi/mpih-div.c
+++ b/mpi/mpih-div.c
@@ -1,532 +1,532 @@
 /* mpih-div.c  -  MPI helper functions
  * Copyright (C) 1994, 1996, 1998, 2000,
  *               2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 
 #ifndef UMUL_TIME
 #define UMUL_TIME 1
 #endif
 #ifndef UDIV_TIME
 #define UDIV_TIME UMUL_TIME
 #endif
 
 /* FIXME: We should be using invert_limb (or invert_normalized_limb)
  * here (not udiv_qrnnd).
  */
 
 mpi_limb_t
 _gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
 				      mpi_limb_t divisor_limb)
 {
     mpi_size_t i;
     mpi_limb_t n1, n0, r;
     mpi_limb_t dummy GCC_ATTR_UNUSED;
 
     /* Botch: Should this be handled at all?  Rely on callers?	*/
     if( !dividend_size )
 	return 0;
 
     /* If multiplication is much faster than division, and the
      * dividend is large, pre-invert the divisor, and use
      * only multiplications in the inner loop.
      *
      * This test should be read:
      *	 Does it ever help to use udiv_qrnnd_preinv?
      *	   && Does what we save compensate for the inversion overhead?
      */
     if( UDIV_TIME > (2 * UMUL_TIME + 6)
 	&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
 	int normalization_steps;
 
 	count_leading_zeros( normalization_steps, divisor_limb );
 	if( normalization_steps ) {
 	    mpi_limb_t divisor_limb_inverted;
 
 	    divisor_limb <<= normalization_steps;
 
 	    /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
 	     * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
 	     * most significant bit (with weight 2**N) implicit.
 	     *
 	     * Special case for DIVISOR_LIMB == 100...000.
 	     */
 	    if( !(divisor_limb << 1) )
 		divisor_limb_inverted = ~(mpi_limb_t)0;
 	    else
 		udiv_qrnnd(divisor_limb_inverted, dummy,
 			   -divisor_limb, 0, divisor_limb);
 
 	    n1 = dividend_ptr[dividend_size - 1];
 	    r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
 
 	    /* Possible optimization:
 	     * if (r == 0
 	     * && divisor_limb > ((n1 << normalization_steps)
 	     *		       | (dividend_ptr[dividend_size - 2] >> ...)))
 	     * ...one division less...
 	     */
 	    for( i = dividend_size - 2; i >= 0; i--) {
 		n0 = dividend_ptr[i];
 		UDIV_QRNND_PREINV(dummy, r, r,
 				   ((n1 << normalization_steps)
 			  | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
 			  divisor_limb, divisor_limb_inverted);
 		n1 = n0;
 	    }
 	    UDIV_QRNND_PREINV(dummy, r, r,
 			      n1 << normalization_steps,
 			      divisor_limb, divisor_limb_inverted);
 	    return r >> normalization_steps;
 	}
 	else {
 	    mpi_limb_t divisor_limb_inverted;
 
 	    /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
 	     * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
 	     * most significant bit (with weight 2**N) implicit.
 	     *
 	     * Special case for DIVISOR_LIMB == 100...000.
 	     */
 	    if( !(divisor_limb << 1) )
 		divisor_limb_inverted = ~(mpi_limb_t)0;
 	    else
 		udiv_qrnnd(divisor_limb_inverted, dummy,
 			    -divisor_limb, 0, divisor_limb);
 
 	    i = dividend_size - 1;
 	    r = dividend_ptr[i];
 
 	    if( r >= divisor_limb )
 		r = 0;
 	    else
 		i--;
 
 	    for( ; i >= 0; i--) {
 		n0 = dividend_ptr[i];
 		UDIV_QRNND_PREINV(dummy, r, r,
 				  n0, divisor_limb, divisor_limb_inverted);
 	    }
 	    return r;
 	}
     }
     else {
 	if( UDIV_NEEDS_NORMALIZATION ) {
 	    int normalization_steps;
 
 	    count_leading_zeros(normalization_steps, divisor_limb);
 	    if( normalization_steps ) {
 		divisor_limb <<= normalization_steps;
 
 		n1 = dividend_ptr[dividend_size - 1];
 		r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
 
 		/* Possible optimization:
 		 * if (r == 0
 		 * && divisor_limb > ((n1 << normalization_steps)
 		 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
 		 * ...one division less...
 		 */
 		for(i = dividend_size - 2; i >= 0; i--) {
 		    n0 = dividend_ptr[i];
 		    udiv_qrnnd (dummy, r, r,
 				((n1 << normalization_steps)
 			 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
 			 divisor_limb);
 		    n1 = n0;
 		}
 		udiv_qrnnd (dummy, r, r,
 			    n1 << normalization_steps,
 			    divisor_limb);
 		return r >> normalization_steps;
 	    }
 	}
 	/* No normalization needed, either because udiv_qrnnd doesn't require
 	 * it, or because DIVISOR_LIMB is already normalized.  */
 	i = dividend_size - 1;
 	r = dividend_ptr[i];
 
 	if(r >= divisor_limb)
 	    r = 0;
 	else
 	    i--;
 
 	for(; i >= 0; i--) {
 	    n0 = dividend_ptr[i];
 	    udiv_qrnnd (dummy, r, r, n0, divisor_limb);
 	}
 	return r;
     }
 }
 
 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
  * the NSIZE-DSIZE least significant quotient limbs at QP
  * and the DSIZE long remainder at NP.	If QEXTRA_LIMBS is
  * non-zero, generate that many fraction bits and append them after the
  * other quotient limbs.
  * Return the most significant limb of the quotient, this is always 0 or 1.
  *
  * Preconditions:
  * 0. NSIZE >= DSIZE.
  * 1. The most significant bit of the divisor must be set.
  * 2. QP must either not overlap with the input operands at all, or
  *    QP + DSIZE >= NP must hold true.	(This means that it's
  *    possible to put the quotient in the high part of NUM, right after the
  *    remainder in NUM.
  * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
  */
 
 mpi_limb_t
 _gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
                       mpi_ptr_t np, mpi_size_t nsize,
                       mpi_ptr_t dp, mpi_size_t dsize)
 {
     mpi_limb_t most_significant_q_limb = 0;
 
     switch(dsize) {
       case 0:
 	_gcry_divide_by_zero();
 	break;
 
       case 1:
 	{
 	    mpi_size_t i;
 	    mpi_limb_t n1;
 	    mpi_limb_t d;
 
 	    d = dp[0];
 	    n1 = np[nsize - 1];
 
 	    if( n1 >= d ) {
 		n1 -= d;
 		most_significant_q_limb = 1;
 	    }
 
 	    qp += qextra_limbs;
 	    for( i = nsize - 2; i >= 0; i--)
 		udiv_qrnnd( qp[i], n1, n1, np[i], d );
 	    qp -= qextra_limbs;
 
 	    for( i = qextra_limbs - 1; i >= 0; i-- )
 		udiv_qrnnd (qp[i], n1, n1, 0, d);
 
 	    np[0] = n1;
 	}
 	break;
 
       case 2:
 	{
 	    mpi_size_t i;
 	    mpi_limb_t n1, n0, n2;
 	    mpi_limb_t d1, d0;
 
 	    np += nsize - 2;
 	    d1 = dp[1];
 	    d0 = dp[0];
 	    n1 = np[1];
 	    n0 = np[0];
 
 	    if( n1 >= d1 && (n1 > d1 || n0 >= d0) ) {
 		sub_ddmmss (n1, n0, n1, n0, d1, d0);
 		most_significant_q_limb = 1;
 	    }
 
 	    for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) {
 		mpi_limb_t q;
 		mpi_limb_t r;
 
 		if( i >= qextra_limbs )
 		    np--;
 		else
 		    np[0] = 0;
 
 		if( n1 == d1 ) {
 		    /* Q should be either 111..111 or 111..110.  Need special
 		     * treatment of this rare case as normal division would
 		     * give overflow.  */
 		    q = ~(mpi_limb_t)0;
 
 		    r = n0 + d1;
 		    if( r < d1 ) {   /* Carry in the addition? */
 			add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 );
 			qp[i] = q;
 			continue;
 		    }
 		    n1 = d0 - (d0 != 0?1:0);
 		    n0 = -d0;
 		}
 		else {
 		    udiv_qrnnd (q, r, n1, n0, d1);
 		    umul_ppmm (n1, n0, d0, q);
 		}
 
 		n2 = np[0];
 	      q_test:
 		if( n1 > r || (n1 == r && n0 > n2) ) {
 		    /* The estimated Q was too large.  */
 		    q--;
 		    sub_ddmmss (n1, n0, n1, n0, 0, d0);
 		    r += d1;
 		    if( r >= d1 )    /* If not carry, test Q again.  */
 			goto q_test;
 		}
 
 		qp[i] = q;
 		sub_ddmmss (n1, n0, r, n2, n1, n0);
 	    }
 	    np[1] = n1;
 	    np[0] = n0;
 	}
 	break;
 
       default:
 	{
 	    mpi_size_t i;
 	    mpi_limb_t dX, d1, n0;
 
 	    np += nsize - dsize;
 	    dX = dp[dsize - 1];
 	    d1 = dp[dsize - 2];
 	    n0 = np[dsize - 1];
 
 	    if( n0 >= dX ) {
 		if(n0 > dX || _gcry_mpih_cmp(np, dp, dsize - 1) >= 0 ) {
 		    _gcry_mpih_sub_n(np, np, dp, dsize);
 		    n0 = np[dsize - 1];
 		    most_significant_q_limb = 1;
 		}
 	    }
 
 	    for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
 		mpi_limb_t q;
 		mpi_limb_t n1, n2;
 		mpi_limb_t cy_limb;
 
 		if( i >= qextra_limbs ) {
 		    np--;
 		    n2 = np[dsize];
 		}
 		else {
 		    n2 = np[dsize - 1];
 		    MPN_COPY_DECR (np + 1, np, dsize - 1);
 		    np[0] = 0;
 		}
 
 		if( n0 == dX ) {
 		    /* This might over-estimate q, but it's probably not worth
 		     * the extra code here to find out.  */
 		    q = ~(mpi_limb_t)0;
 		}
 		else {
 		    mpi_limb_t r;
 
 		    udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
 		    umul_ppmm(n1, n0, d1, q);
 
 		    while( n1 > r || (n1 == r && n0 > np[dsize - 2])) {
 			q--;
 			r += dX;
 			if( r < dX ) /* I.e. "carry in previous addition?" */
 			    break;
 			n1 -= n0 < d1;
 			n0 -= d1;
 		    }
 		}
 
 		/* Possible optimization: We already have (q * n0) and (1 * n1)
 		 * after the calculation of q.	Taking advantage of that, we
 		 * could make this loop make two iterations less.  */
 		cy_limb = _gcry_mpih_submul_1(np, dp, dsize, q);
 
 		if( n2 != cy_limb ) {
 		    _gcry_mpih_add_n(np, np, dp, dsize);
 		    q--;
 		}
 
 		qp[i] = q;
 		n0 = np[dsize - 1];
 	    }
 	}
     }
 
     return most_significant_q_limb;
 }
 
 
 /****************
  * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
  * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
  * Return the single-limb remainder.
  * There are no constraints on the value of the divisor.
  *
  * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
  */
 
 mpi_limb_t
 _gcry_mpih_divmod_1( mpi_ptr_t quot_ptr,
                         mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
                         mpi_limb_t divisor_limb)
 {
     mpi_size_t i;
     mpi_limb_t n1, n0, r;
     mpi_limb_t dummy GCC_ATTR_UNUSED;
 
     if( !dividend_size )
 	return 0;
 
     /* If multiplication is much faster than division, and the
      * dividend is large, pre-invert the divisor, and use
      * only multiplications in the inner loop.
      *
      * This test should be read:
      * Does it ever help to use udiv_qrnnd_preinv?
      * && Does what we save compensate for the inversion overhead?
      */
     if( UDIV_TIME > (2 * UMUL_TIME + 6)
 	&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
 	int normalization_steps;
 
 	count_leading_zeros( normalization_steps, divisor_limb );
 	if( normalization_steps ) {
 	    mpi_limb_t divisor_limb_inverted;
 
 	    divisor_limb <<= normalization_steps;
 
 	    /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
 	     * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
 	     * most significant bit (with weight 2**N) implicit.
 	     */
 	    /* Special case for DIVISOR_LIMB == 100...000.  */
 	    if( !(divisor_limb << 1) )
 		divisor_limb_inverted = ~(mpi_limb_t)0;
 	    else
 		udiv_qrnnd(divisor_limb_inverted, dummy,
 			   -divisor_limb, 0, divisor_limb);
 
 	    n1 = dividend_ptr[dividend_size - 1];
 	    r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
 
 	    /* Possible optimization:
 	     * if (r == 0
 	     * && divisor_limb > ((n1 << normalization_steps)
 	     *		       | (dividend_ptr[dividend_size - 2] >> ...)))
 	     * ...one division less...
 	     */
 	    for( i = dividend_size - 2; i >= 0; i--) {
 		n0 = dividend_ptr[i];
 		UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r,
 				   ((n1 << normalization_steps)
 			 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
 			      divisor_limb, divisor_limb_inverted);
 		n1 = n0;
 	    }
 	    UDIV_QRNND_PREINV( quot_ptr[0], r, r,
 			       n1 << normalization_steps,
 			       divisor_limb, divisor_limb_inverted);
 	    return r >> normalization_steps;
 	}
 	else {
 	    mpi_limb_t divisor_limb_inverted;
 
 	    /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
 	     * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
 	     * most significant bit (with weight 2**N) implicit.
 	     */
 	    /* Special case for DIVISOR_LIMB == 100...000.  */
 	    if( !(divisor_limb << 1) )
 		divisor_limb_inverted = ~(mpi_limb_t) 0;
 	    else
 		udiv_qrnnd(divisor_limb_inverted, dummy,
 			   -divisor_limb, 0, divisor_limb);
 
 	    i = dividend_size - 1;
 	    r = dividend_ptr[i];
 
 	    if( r >= divisor_limb )
 		r = 0;
 	    else
 		quot_ptr[i--] = 0;
 
 	    for( ; i >= 0; i-- ) {
 		n0 = dividend_ptr[i];
 		UDIV_QRNND_PREINV( quot_ptr[i], r, r,
 				   n0, divisor_limb, divisor_limb_inverted);
 	    }
 	    return r;
 	}
     }
     else {
 	if(UDIV_NEEDS_NORMALIZATION) {
 	    int normalization_steps;
 
 	    count_leading_zeros (normalization_steps, divisor_limb);
 	    if( normalization_steps ) {
 		divisor_limb <<= normalization_steps;
 
 		n1 = dividend_ptr[dividend_size - 1];
 		r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
 
 		/* Possible optimization:
 		 * if (r == 0
 		 * && divisor_limb > ((n1 << normalization_steps)
 		 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
 		 * ...one division less...
 		 */
 		for( i = dividend_size - 2; i >= 0; i--) {
 		    n0 = dividend_ptr[i];
 		    udiv_qrnnd (quot_ptr[i + 1], r, r,
 			     ((n1 << normalization_steps)
 			 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
 				divisor_limb);
 		    n1 = n0;
 		}
 		udiv_qrnnd (quot_ptr[0], r, r,
 			    n1 << normalization_steps,
 			    divisor_limb);
 		return r >> normalization_steps;
 	    }
 	}
 	/* No normalization needed, either because udiv_qrnnd doesn't require
 	 * it, or because DIVISOR_LIMB is already normalized.  */
 	i = dividend_size - 1;
 	r = dividend_ptr[i];
 
 	if(r >= divisor_limb)
 	    r = 0;
 	else
 	    quot_ptr[i--] = 0;
 
 	for(; i >= 0; i--) {
 	    n0 = dividend_ptr[i];
 	    udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb );
 	}
 	return r;
     }
 }
diff --git a/mpi/mpih-mul.c b/mpi/mpih-mul.c
index aa454cfe..6c51533f 100644
--- a/mpi/mpih-mul.c
+++ b/mpi/mpih-mul.c
@@ -1,529 +1,529 @@
 /* mpih-mul.c  -  MPI helper functions
  * Copyright (C) 1994, 1996, 1998, 1999, 2000,
  *               2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "mpi-internal.h"
 #include "longlong.h"
 #include "g10lib.h"
 
 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
     do {						\
 	if( (size) < KARATSUBA_THRESHOLD )		\
 	    mul_n_basecase (prodp, up, vp, size);	\
 	else						\
 	    mul_n (prodp, up, vp, size, tspace);	\
     } while (0)
 
 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
     do {					    \
 	if ((size) < KARATSUBA_THRESHOLD)	    \
 	    _gcry_mpih_sqr_n_basecase (prodp, up, size);	 \
 	else					    \
 	    _gcry_mpih_sqr_n (prodp, up, size, tspace);	 \
     } while (0)
 
 
 
 
 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
  * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
  * always stored.  Return the most significant limb.
  *
  * Argument constraints:
  * 1. PRODP != UP and PRODP != VP, i.e. the destination
  *    must be distinct from the multiplier and the multiplicand.
  *
  *
  * Handle simple cases with traditional multiplication.
  *
  * This is the most critical code of multiplication.  All multiplies rely
  * on this, both small and huge.  Small ones arrive here immediately.  Huge
  * ones arrive here as this is the base case for Karatsuba's recursive
  * algorithm below.
  */
 
 static mpi_limb_t
 mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up,
 				 mpi_ptr_t vp, mpi_size_t size)
 {
     mpi_size_t i;
     mpi_limb_t cy;
     mpi_limb_t v_limb;
 
     /* Multiply by the first limb in V separately, as the result can be
      * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
     v_limb = vp[0];
     if( v_limb <= 1 ) {
 	if( v_limb == 1 )
 	    MPN_COPY( prodp, up, size );
 	else
 	    MPN_ZERO( prodp, size );
 	cy = 0;
     }
     else
 	cy = _gcry_mpih_mul_1( prodp, up, size, v_limb );
 
     prodp[size] = cy;
     prodp++;
 
     /* For each iteration in the outer loop, multiply one limb from
      * U with one limb from V, and add it to PROD.  */
     for( i = 1; i < size; i++ ) {
 	v_limb = vp[i];
 	if( v_limb <= 1 ) {
 	    cy = 0;
 	    if( v_limb == 1 )
 	       cy = _gcry_mpih_add_n(prodp, prodp, up, size);
 	}
 	else
 	    cy = _gcry_mpih_addmul_1(prodp, up, size, v_limb);
 
 	prodp[size] = cy;
 	prodp++;
     }
 
     return cy;
 }
 
 
 static void
 mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
 			mpi_size_t size, mpi_ptr_t tspace )
 {
     if( size & 1 ) {
       /* The size is odd, and the code below doesn't handle that.
        * Multiply the least significant (size - 1) limbs with a recursive
        * call, and handle the most significant limb of S1 and S2
        * separately.
        * A slightly faster way to do this would be to make the Karatsuba
        * code below behave as if the size were even, and let it check for
        * odd size in the end.  I.e., in essence move this code to the end.
        * Doing so would save us a recursive call, and potentially make the
        * stack grow a lot less.
        */
       mpi_size_t esize = size - 1;	 /* even size */
       mpi_limb_t cy_limb;
 
       MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace );
       cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, esize, vp[esize] );
       prodp[esize + esize] = cy_limb;
       cy_limb = _gcry_mpih_addmul_1( prodp + esize, vp, size, up[esize] );
       prodp[esize + size] = cy_limb;
     }
     else {
 	/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
 	 *
 	 * Split U in two pieces, U1 and U0, such that
 	 * U = U0 + U1*(B**n),
 	 * and V in V1 and V0, such that
 	 * V = V0 + V1*(B**n).
 	 *
 	 * UV is then computed recursively using the identity
 	 *
 	 *	  2n   n	  n			n
 	 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
 	 *		  1 1	     1	0   0  1	      0 0
 	 *
 	 * Where B = 2**BITS_PER_MP_LIMB.
 	 */
 	mpi_size_t hsize = size >> 1;
 	mpi_limb_t cy;
 	int negflg;
 
 	/* Product H.	   ________________  ________________
 	 *		  |_____U1 x V1____||____U0 x V0_____|
 	 * Put result in upper part of PROD and pass low part of TSPACE
 	 * as new TSPACE.
 	 */
 	MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace);
 
 	/* Product M.	   ________________
 	 *		  |_(U1-U0)(V0-V1)_|
 	 */
 	if( _gcry_mpih_cmp(up + hsize, up, hsize) >= 0 ) {
 	    _gcry_mpih_sub_n(prodp, up + hsize, up, hsize);
 	    negflg = 0;
 	}
 	else {
 	    _gcry_mpih_sub_n(prodp, up, up + hsize, hsize);
 	    negflg = 1;
 	}
 	if( _gcry_mpih_cmp(vp + hsize, vp, hsize) >= 0 ) {
 	    _gcry_mpih_sub_n(prodp + hsize, vp + hsize, vp, hsize);
 	    negflg ^= 1;
 	}
 	else {
 	    _gcry_mpih_sub_n(prodp + hsize, vp, vp + hsize, hsize);
 	    /* No change of NEGFLG.  */
 	}
 	/* Read temporary operands from low part of PROD.
 	 * Put result in low part of TSPACE using upper part of TSPACE
 	 * as new TSPACE.
 	 */
 	MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size);
 
 	/* Add/copy product H. */
 	MPN_COPY (prodp + hsize, prodp + size, hsize);
 	cy = _gcry_mpih_add_n( prodp + size, prodp + size,
 			    prodp + size + hsize, hsize);
 
 	/* Add product M (if NEGFLG M is a negative number) */
 	if(negflg)
 	    cy -= _gcry_mpih_sub_n(prodp + hsize, prodp + hsize, tspace, size);
 	else
 	    cy += _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace, size);
 
 	/* Product L.	   ________________  ________________
 	 *		  |________________||____U0 x V0_____|
 	 * Read temporary operands from low part of PROD.
 	 * Put result in low part of TSPACE using upper part of TSPACE
 	 * as new TSPACE.
 	 */
 	MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
 
 	/* Add/copy Product L (twice) */
 
 	cy += _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace, size);
 	if( cy )
 	  _gcry_mpih_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy);
 
 	MPN_COPY(prodp, tspace, hsize);
 	cy = _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize);
 	if( cy )
 	    _gcry_mpih_add_1(prodp + size, prodp + size, size, 1);
     }
 }
 
 
 void
 _gcry_mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
 {
     mpi_size_t i;
     mpi_limb_t cy_limb;
     mpi_limb_t v_limb;
 
     /* Multiply by the first limb in V separately, as the result can be
      * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
     v_limb = up[0];
     if( v_limb <= 1 ) {
 	if( v_limb == 1 )
 	    MPN_COPY( prodp, up, size );
 	else
 	    MPN_ZERO(prodp, size);
 	cy_limb = 0;
     }
     else
 	cy_limb = _gcry_mpih_mul_1( prodp, up, size, v_limb );
 
     prodp[size] = cy_limb;
     prodp++;
 
     /* For each iteration in the outer loop, multiply one limb from
      * U with one limb from V, and add it to PROD.  */
     for( i=1; i < size; i++) {
 	v_limb = up[i];
 	if( v_limb <= 1 ) {
 	    cy_limb = 0;
 	    if( v_limb == 1 )
 		cy_limb = _gcry_mpih_add_n(prodp, prodp, up, size);
 	}
 	else
 	    cy_limb = _gcry_mpih_addmul_1(prodp, up, size, v_limb);
 
 	prodp[size] = cy_limb;
 	prodp++;
     }
 }
 
 
 void
 _gcry_mpih_sqr_n( mpi_ptr_t prodp,
                   mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
 {
     if( size & 1 ) {
 	/* The size is odd, and the code below doesn't handle that.
 	 * Multiply the least significant (size - 1) limbs with a recursive
 	 * call, and handle the most significant limb of S1 and S2
 	 * separately.
 	 * A slightly faster way to do this would be to make the Karatsuba
 	 * code below behave as if the size were even, and let it check for
 	 * odd size in the end.  I.e., in essence move this code to the end.
 	 * Doing so would save us a recursive call, and potentially make the
 	 * stack grow a lot less.
 	 */
 	mpi_size_t esize = size - 1;	   /* even size */
 	mpi_limb_t cy_limb;
 
 	MPN_SQR_N_RECURSE( prodp, up, esize, tspace );
 	cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, esize, up[esize] );
 	prodp[esize + esize] = cy_limb;
 	cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, size, up[esize] );
 
 	prodp[esize + size] = cy_limb;
     }
     else {
 	mpi_size_t hsize = size >> 1;
 	mpi_limb_t cy;
 
 	/* Product H.	   ________________  ________________
 	 *		  |_____U1 x U1____||____U0 x U0_____|
 	 * Put result in upper part of PROD and pass low part of TSPACE
 	 * as new TSPACE.
 	 */
 	MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
 
 	/* Product M.	   ________________
 	 *		  |_(U1-U0)(U0-U1)_|
 	 */
 	if( _gcry_mpih_cmp( up + hsize, up, hsize) >= 0 )
 	    _gcry_mpih_sub_n( prodp, up + hsize, up, hsize);
 	else
 	    _gcry_mpih_sub_n (prodp, up, up + hsize, hsize);
 
 	/* Read temporary operands from low part of PROD.
 	 * Put result in low part of TSPACE using upper part of TSPACE
 	 * as new TSPACE.  */
 	MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
 
 	/* Add/copy product H  */
 	MPN_COPY(prodp + hsize, prodp + size, hsize);
 	cy = _gcry_mpih_add_n(prodp + size, prodp + size,
 			   prodp + size + hsize, hsize);
 
 	/* Add product M (if NEGFLG M is a negative number).  */
 	cy -= _gcry_mpih_sub_n (prodp + hsize, prodp + hsize, tspace, size);
 
 	/* Product L.	   ________________  ________________
 	 *		  |________________||____U0 x U0_____|
 	 * Read temporary operands from low part of PROD.
 	 * Put result in low part of TSPACE using upper part of TSPACE
 	 * as new TSPACE.  */
 	MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
 
 	/* Add/copy Product L (twice).	*/
 	cy += _gcry_mpih_add_n (prodp + hsize, prodp + hsize, tspace, size);
 	if( cy )
 	    _gcry_mpih_add_1(prodp + hsize + size, prodp + hsize + size,
 							    hsize, cy);
 
 	MPN_COPY(prodp, tspace, hsize);
 	cy = _gcry_mpih_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
 	if( cy )
 	    _gcry_mpih_add_1 (prodp + size, prodp + size, size, 1);
     }
 }
 
 
 /* This should be made into an inline function in gmp.h.  */
 void
 _gcry_mpih_mul_n( mpi_ptr_t prodp,
                      mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
 {
     int secure;
 
     if( up == vp ) {
 	if( size < KARATSUBA_THRESHOLD )
 	    _gcry_mpih_sqr_n_basecase( prodp, up, size );
 	else {
 	    mpi_ptr_t tspace;
 	    secure = _gcry_is_secure( up );
 	    tspace = mpi_alloc_limb_space( 2 * size, secure );
 	    _gcry_mpih_sqr_n( prodp, up, size, tspace );
 	    _gcry_mpi_free_limb_space (tspace, 2 * size );
 	}
     }
     else {
 	if( size < KARATSUBA_THRESHOLD )
 	    mul_n_basecase( prodp, up, vp, size );
 	else {
 	    mpi_ptr_t tspace;
 	    secure = _gcry_is_secure( up ) || _gcry_is_secure( vp );
 	    tspace = mpi_alloc_limb_space( 2 * size, secure );
 	    mul_n (prodp, up, vp, size, tspace);
 	    _gcry_mpi_free_limb_space (tspace, 2 * size );
 	}
     }
 }
 
 
 
 void
 _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp,
                                   mpi_ptr_t up, mpi_size_t usize,
                                   mpi_ptr_t vp, mpi_size_t vsize,
                                   struct karatsuba_ctx *ctx )
 {
     mpi_limb_t cy;
 
     if( !ctx->tspace || ctx->tspace_size < vsize ) {
 	if( ctx->tspace )
 	    _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs );
         ctx->tspace_nlimbs = 2 * vsize;
 	ctx->tspace = mpi_alloc_limb_space (2 * vsize,
 				            (_gcry_is_secure (up)
                                              || _gcry_is_secure (vp)));
 	ctx->tspace_size = vsize;
     }
 
     MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
 
     prodp += vsize;
     up += vsize;
     usize -= vsize;
     if( usize >= vsize ) {
 	if( !ctx->tp || ctx->tp_size < vsize ) {
 	    if( ctx->tp )
 		_gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs );
             ctx->tp_nlimbs = 2 * vsize;
 	    ctx->tp = mpi_alloc_limb_space (2 * vsize,
                                             (_gcry_is_secure (up)
                                              || _gcry_is_secure (vp)));
 	    ctx->tp_size = vsize;
 	}
 
 	do {
 	    MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
 	    cy = _gcry_mpih_add_n( prodp, prodp, ctx->tp, vsize );
 	    _gcry_mpih_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
 	    prodp += vsize;
 	    up += vsize;
 	    usize -= vsize;
 	} while( usize >= vsize );
     }
 
     if( usize ) {
 	if( usize < KARATSUBA_THRESHOLD ) {
 	    _gcry_mpih_mul( ctx->tspace, vp, vsize, up, usize );
 	}
 	else {
 	    if( !ctx->next ) {
 		ctx->next = xcalloc( 1, sizeof *ctx );
 	    }
 	    _gcry_mpih_mul_karatsuba_case( ctx->tspace,
 					vp, vsize,
 					up, usize,
 					ctx->next );
 	}
 
 	cy = _gcry_mpih_add_n( prodp, prodp, ctx->tspace, vsize);
 	_gcry_mpih_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
     }
 }
 
 
 void
 _gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
 {
     struct karatsuba_ctx *ctx2;
 
     if( ctx->tp )
 	_gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs );
     if( ctx->tspace )
 	_gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs );
     for( ctx=ctx->next; ctx; ctx = ctx2 ) {
 	ctx2 = ctx->next;
 	if( ctx->tp )
             _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs );
 	if( ctx->tspace )
 	    _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs );
 	xfree( ctx );
     }
 }
 
 /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
  * and v (pointed to by VP, with VSIZE limbs), and store the result at
  * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
  * operands are normalized.  Return the most significant limb of the
  * result.
  *
  * NOTE: The space pointed to by PRODP is overwritten before finished
  * with U and V, so overlap is an error.
  *
  * Argument constraints:
  * 1. USIZE >= VSIZE.
  * 2. PRODP != UP and PRODP != VP, i.e. the destination
  *    must be distinct from the multiplier and the multiplicand.
  */
 
 mpi_limb_t
 _gcry_mpih_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
                    mpi_ptr_t vp, mpi_size_t vsize)
 {
     mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
     mpi_limb_t cy;
     struct karatsuba_ctx ctx;
 
     if( vsize < KARATSUBA_THRESHOLD ) {
 	mpi_size_t i;
 	mpi_limb_t v_limb;
 
 	if( !vsize )
 	    return 0;
 
 	/* Multiply by the first limb in V separately, as the result can be
 	 * stored (not added) to PROD.	We also avoid a loop for zeroing.  */
 	v_limb = vp[0];
 	if( v_limb <= 1 ) {
 	    if( v_limb == 1 )
 		MPN_COPY( prodp, up, usize );
 	    else
 		MPN_ZERO( prodp, usize );
 	    cy = 0;
 	}
 	else
 	    cy = _gcry_mpih_mul_1( prodp, up, usize, v_limb );
 
 	prodp[usize] = cy;
 	prodp++;
 
 	/* For each iteration in the outer loop, multiply one limb from
 	 * U with one limb from V, and add it to PROD.	*/
 	for( i = 1; i < vsize; i++ ) {
 	    v_limb = vp[i];
 	    if( v_limb <= 1 ) {
 		cy = 0;
 		if( v_limb == 1 )
 		   cy = _gcry_mpih_add_n(prodp, prodp, up, usize);
 	    }
 	    else
 		cy = _gcry_mpih_addmul_1(prodp, up, usize, v_limb);
 
 	    prodp[usize] = cy;
 	    prodp++;
 	}
 
 	return cy;
     }
 
     memset( &ctx, 0, sizeof ctx );
     _gcry_mpih_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
     _gcry_mpih_release_karatsuba_ctx( &ctx );
     return *prod_endp;
 }
diff --git a/mpi/pa7100/mpih-lshift.S b/mpi/pa7100/mpih-lshift.S
index 8ade1964..9bf6f728 100644
--- a/mpi/pa7100/mpih-lshift.S
+++ b/mpi/pa7100/mpih-lshift.S
@@ -1,96 +1,96 @@
 /* hppa   lshift
  *	  optimized for the PA7100, where it runs at 3.25 cycles/limb
  * 
  *      Copyright (C) 1992, 1994, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_lshift( mpi_ptr_t wp,	(gr26)
  *		   mpi_ptr_t up,	(gr25)
  *		   mpi_size_t usize,	(gr24)
  *		   unsigned cnt)	(gr23)
  */
 
 	.code
 	.export 	_gcry_mpih_lshift
 	.label		_gcry_mpih_lshift
 	.proc
 	.callinfo	frame=64,no_calls
 	.entry
 
 	sh2add		%r24,%r25,%r25
 	sh2add		%r24,%r26,%r26
 	ldws,mb 	-4(0,%r25),%r22
 	subi		32,%r23,%r1
 	mtsar		%r1
 	addib,= 	-1,%r24,L$0004
 	vshd		%r0,%r22,%r28		; compute carry out limb
 	ldws,mb 	-4(0,%r25),%r29
 	addib,<=	-5,%r24,L$rest
 	vshd		%r22,%r29,%r20
 
 	.label	L$loop
 	ldws,mb 	-4(0,%r25),%r22
 	stws,mb 	%r20,-4(0,%r26)
 	vshd		%r29,%r22,%r20
 	ldws,mb 	-4(0,%r25),%r29
 	stws,mb 	%r20,-4(0,%r26)
 	vshd		%r22,%r29,%r20
 	ldws,mb 	-4(0,%r25),%r22
 	stws,mb 	%r20,-4(0,%r26)
 	vshd		%r29,%r22,%r20
 	ldws,mb 	-4(0,%r25),%r29
 	stws,mb 	%r20,-4(0,%r26)
 	addib,> 	-4,%r24,L$loop
 	vshd		%r22,%r29,%r20
 
 	.label	L$rest
 	addib,= 	4,%r24,L$end1
 	nop
 	.label	L$eloop
 	ldws,mb 	-4(0,%r25),%r22
 	stws,mb 	%r20,-4(0,%r26)
 	addib,<=	-1,%r24,L$end2
 	vshd		%r29,%r22,%r20
 	ldws,mb 	-4(0,%r25),%r29
 	stws,mb 	%r20,-4(0,%r26)
 	addib,> 	-1,%r24,L$eloop
 	vshd		%r22,%r29,%r20
 
 	.label	L$end1
 	stws,mb 	%r20,-4(0,%r26)
 	vshd		%r29,%r0,%r20
 	bv		0(%r2)
 	stw		%r20,-4(0,%r26)
 	.label	L$end2
 	stws,mb 	%r20,-4(0,%r26)
 	.label	L$0004
 	vshd		%r22,%r0,%r20
 	bv		0(%r2)
 	stw		%r20,-4(0,%r26)
 
 	.exit
 	.procend
 
 
 
diff --git a/mpi/pa7100/mpih-rshift.S b/mpi/pa7100/mpih-rshift.S
index 06242027..d0ceb8b1 100644
--- a/mpi/pa7100/mpih-rshift.S
+++ b/mpi/pa7100/mpih-rshift.S
@@ -1,92 +1,92 @@
 /* hppa   rshift
  *	  optimized for the PA7100, where it runs at 3.25 cycles/limb
  *
  *      Copyright (C) 1992, 1994, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_rshift( mpi_ptr_t wp,       (gr26)
  *		   mpi_ptr_t up,       (gr25)
  *		   mpi_size_t usize,   (gr24)
  *		   unsigned cnt)       (gr23)
  */
 
 	.code
 	.export 	_gcry_mpih_rshift
 	.label		_gcry_mpih_rshift
 	.proc
 	.callinfo	frame=64,no_calls
 	.entry
 
 	ldws,ma 	4(0,%r25),%r22
 	mtsar		%r23
 	addib,= 	-1,%r24,L$r004
 	vshd		%r22,%r0,%r28		; compute carry out limb
 	ldws,ma 	4(0,%r25),%r29
 	addib,<=	-5,%r24,L$rrest
 	vshd		%r29,%r22,%r20
 
 	.label	L$roop
 	ldws,ma 	4(0,%r25),%r22
 	stws,ma 	%r20,4(0,%r26)
 	vshd		%r22,%r29,%r20
 	ldws,ma 	4(0,%r25),%r29
 	stws,ma 	%r20,4(0,%r26)
 	vshd		%r29,%r22,%r20
 	ldws,ma 	4(0,%r25),%r22
 	stws,ma 	%r20,4(0,%r26)
 	vshd		%r22,%r29,%r20
 	ldws,ma 	4(0,%r25),%r29
 	stws,ma 	%r20,4(0,%r26)
 	addib,> 	-4,%r24,L$roop
 	vshd		%r29,%r22,%r20
 
 	.label	L$rrest
 	addib,= 	4,%r24,L$rend1
 	nop
 	.label	L$eroop
 	ldws,ma 	4(0,%r25),%r22
 	stws,ma 	%r20,4(0,%r26)
 	addib,<=	-1,%r24,L$rend2
 	vshd		%r22,%r29,%r20
 	ldws,ma 	4(0,%r25),%r29
 	stws,ma 	%r20,4(0,%r26)
 	addib,> 	-1,%r24,L$eroop
 	vshd		%r29,%r22,%r20
 
 	.label	L$rend1
 	stws,ma		%r20,4(0,%r26)
 	vshd		%r0,%r29,%r20
 	bv		0(%r2)
 	stw		%r20,0(0,%r26)
 	.label	L$rend2
 	stws,ma		%r20,4(0,%r26)
 	.label	L$r004
 	vshd		%r0,%r22,%r20
 	bv		0(%r2)
 	stw		%r20,0(0,%r26)
 
 	.exit
 	.procend
 
 
diff --git a/mpi/power/mpih-add1.S b/mpi/power/mpih-add1.S
index 876b56c6..b992f158 100644
--- a/mpi/power/mpih-add1.S
+++ b/mpi/power/mpih-add1.S
@@ -1,87 +1,87 @@
 /* IBM POWER add_n -- Add two limb vectors of equal, non-zero length.
  *
  *      Copyright (C) 1992, 1994, 1996, 1999,
  *                    2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*
 # INPUT PARAMETERS
 # res_ptr	r3
 # s1_ptr	r4
 # s2_ptr	r5
 # size		r6
  */
 
 	.toc
 	.extern _gcry_mpih_add_n[DS]
 	.extern ._gcry_mpih_add_n
 .csect [PR]
 	.align 2
 	.globl _gcry_mpih_add_n
 	.globl ._gcry_mpih_add_n
 	.csect _gcry_mpih_add_n[DS]
 _gcry_mpih_add_n:
 	.long ._gcry_mpih_add_n, TOC[tc0], 0
 	.csect [PR]
 ._gcry_mpih_add_n:
 	andil.	10,6,1		# odd or even number of limbs?
 	l	8,0(4)		# load least significant s1 limb
 	l	0,0(5)		# load least significant s2 limb
 	cal	3,-4(3) 	# offset res_ptr, it's updated before it's used
 	sri	10,6,1		# count for unrolled loop
 	a	7,0,8		# add least significant limbs, set cy
 	mtctr	10		# copy count into CTR
 	beq	0,Leven 	# branch if even # of limbs (# of limbs >= 2)
 
 # We have an odd # of limbs.  Add the first limbs separately.
 	cmpi	1,10,0		# is count for unrolled loop zero?
 	bne	1,L1		# branch if not
 	st	7,4(3)
 	aze	3,10		# use the fact that r10 is zero...
 	br			# return
 
 # We added least significant limbs.  Now reload the next limbs to enter loop.
 L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
 	lu	0,4(5)		# load s2 limb and update s2_ptr
 	stu	7,4(3)
 	ae	7,0,8		# add limbs, set cy
 Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
 	lu	10,4(5) 	# load s2 limb and update s2_ptr
 	bdz	Lend		# If done, skip loop
 
 Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
 	lu	0,4(5)		# load s2 limb and update s2_ptr
 	ae	11,9,10 	# add previous limbs with cy, set cy
 	stu	7,4(3)		#
 	lu	9,4(4)		# load s1 limb and update s1_ptr
 	lu	10,4(5) 	# load s2 limb and update s2_ptr
 	ae	7,0,8		# add previous limbs with cy, set cy
 	stu	11,4(3) 	#
 	bdn	Loop		# decrement CTR and loop back
 
 Lend:	ae	11,9,10 	# add limbs with cy, set cy
 	st	7,4(3)		#
 	st	11,8(3) 	#
 	lil	3,0		# load cy into ...
 	aze	3,3		# ... return value register
 	br
 
diff --git a/mpi/power/mpih-lshift.S b/mpi/power/mpih-lshift.S
index d9e42daf..1fcd1891 100644
--- a/mpi/power/mpih-lshift.S
+++ b/mpi/power/mpih-lshift.S
@@ -1,64 +1,64 @@
 /* IBM POWER lshift
  *
  *      Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*
 # INPUT PARAMETERS
 # res_ptr	r3
 # s_ptr 	r4
 # size		r5
 # cnt		r6
  */
 
 	.toc
 	.extern _gcry_mpih_lshift[DS]
 	.extern ._gcry_mpih_lshift
 .csect [PR]
 	.align 2
 	.globl _gcry_mpih_lshift
 	.globl ._gcry_mpih_lshift
 	.csect _gcry_mpih_lshift[DS]
 _gcry_mpih_lshift:
 	.long ._gcry_mpih_lshift, TOC[tc0], 0
 	.csect [PR]
 ._gcry_mpih_lshift:
 	sli	0,5,2
 	cax	9,3,0
 	cax	4,4,0
 	sfi	8,6,32
 	mtctr	5		# put limb count in CTR loop register
 	lu	0,-4(4) 	# read most significant limb
 	sre	3,0,8		# compute carry out limb, and init MQ register
 	bdz	Lend2		# if just one limb, skip loop
 	lu	0,-4(4) 	# read 2:nd most significant limb
 	sreq	7,0,8		# compute most significant limb of result
 	bdz	Lend		# if just two limb, skip loop
 Loop:	lu	0,-4(4) 	# load next lower limb
 	stu	7,-4(9) 	# store previous result during read latency
 	sreq	7,0,8		# compute result limb
 	bdn	Loop		# loop back until CTR is zero
 Lend:	stu	7,-4(9) 	# store 2:nd least significant limb
 Lend2:	sle	7,0,6		# compute least significant limb
 	st	7,-4(9) 	# store it
 	br
 
diff --git a/mpi/power/mpih-mul1.S b/mpi/power/mpih-mul1.S
index 35034fa4..5876443c 100644
--- a/mpi/power/mpih-mul1.S
+++ b/mpi/power/mpih-mul1.S
@@ -1,115 +1,115 @@
 /* IBM POWER  mul_1 -- Multiply a limb vector with a limb and store
  *                     the result in a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*
 # INPUT PARAMETERS
 # res_ptr	r3
 # s1_ptr	r4
 # size		r5
 # s2_limb	r6
 
 # The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
 # obtain that operation, we have to use the 32x32->64 signed multiplication
 # instruction, and add the appropriate compensation to the high limb of the
 # result.  We add the multiplicand if the multiplier has its most significant
 # bit set, and we add the multiplier if the multiplicand has its most
 # significant bit set.	We need to preserve the carry flag between each
 # iteration, so we have to compute the compensation carefully (the natural,
 # srai+and doesn't work).  Since the POWER architecture has a branch unit
 # we can branch in zero cycles, so that's how we perform the additions.
  */
 
 	.toc
 	.csect ._gcry_mpih_mul_1[PR]
 	.align 2
 	.globl _gcry_mpih_mul_1
 	.globl ._gcry_mpih_mul_1
 	.csect _gcry_mpih_mul_1[DS]
 _gcry_mpih_mul_1:
 	.long ._gcry_mpih_mul_1[PR], TOC[tc0], 0
 	.csect ._gcry_mpih_mul_1[PR]
 ._gcry_mpih_mul_1:
 
 	cal	3,-4(3)
 	l	0,0(4)
 	cmpi	0,6,0
 	mtctr	5
 	mul	9,0,6
 	srai	7,0,31
 	and	7,7,6
 	mfmq	8
 	ai	0,0,0		# reset carry
 	cax	9,9,7
 	blt	Lneg
 Lpos:	bdz	Lend
 Lploop: lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	10,0,6
 	mfmq	0
 	ae	8,0,9
 	bge	Lp0
 	cax	10,10,6 	# adjust high limb for negative limb from s1
 Lp0:	bdz	Lend0
 	lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	9,0,6
 	mfmq	0
 	ae	8,0,10
 	bge	Lp1
 	cax	9,9,6		# adjust high limb for negative limb from s1
 Lp1:	bdn	Lploop
 	b	Lend
 
 Lneg:	cax	9,9,0
 	bdz	Lend
 Lnloop: lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	10,0,6
 	cax	10,10,0 	# adjust high limb for negative s2_limb
 	mfmq	0
 	ae	8,0,9
 	bge	Ln0
 	cax	10,10,6 	# adjust high limb for negative limb from s1
 Ln0:	bdz	Lend0
 	lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	9,0,6
 	cax	9,9,0		# adjust high limb for negative s2_limb
 	mfmq	0
 	ae	8,0,10
 	bge	Ln1
 	cax	9,9,6		# adjust high limb for negative limb from s1
 Ln1:	bdn	Lnloop
 	b	Lend
 
 Lend0:	cal	9,0(10)
 Lend:	st	8,4(3)
 	aze	3,9
 	br
 
diff --git a/mpi/power/mpih-mul2.S b/mpi/power/mpih-mul2.S
index d056e8f3..5b91101e 100644
--- a/mpi/power/mpih-mul2.S
+++ b/mpi/power/mpih-mul2.S
@@ -1,130 +1,130 @@
 /* IBM POWER addmul_1 -- Multiply a limb vector with a limb and add
  *			 the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 
 /*
 # INPUT PARAMETERS
 # res_ptr	r3
 # s1_ptr	r4
 # size		r5
 # s2_limb	r6
 
 # The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
 # obtain that operation, we have to use the 32x32->64 signed multiplication
 # instruction, and add the appropriate compensation to the high limb of the
 # result.  We add the multiplicand if the multiplier has its most significant
 # bit set, and we add the multiplier if the multiplicand has its most
 # significant bit set.	We need to preserve the carry flag between each
 # iteration, so we have to compute the compensation carefully (the natural,
 # srai+and doesn't work).  Since the POWER architecture has a branch unit
 # we can branch in zero cycles, so that's how we perform the additions.
  */
 
 	.toc
 	.csect ._gcry_mpih_addmul_1[PR]
 	.align 2
 	.globl _gcry_mpih_addmul_1
 	.globl ._gcry_mpih_addmul_1
 	.csect _gcry_mpih_addmul_1[DS]
 _gcry_mpih_addmul_1:
 	.long ._gcry_mpih_addmul_1[PR], TOC[tc0], 0
 	.csect ._gcry_mpih_addmul_1[PR]
 ._gcry_mpih_addmul_1:
 
 	cal	3,-4(3)
 	l	0,0(4)
 	cmpi	0,6,0
 	mtctr	5
 	mul	9,0,6
 	srai	7,0,31
 	and	7,7,6
 	mfmq	8
 	cax	9,9,7
 	l	7,4(3)
 	a	8,8,7		# add res_limb
 	blt	Lneg
 Lpos:	bdz	Lend
 
 Lploop: lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	10,0,6
 	mfmq	0
 	ae	8,0,9		# low limb + old_cy_limb + old cy
 	l	7,4(3)
 	aze	10,10		# propagate cy to new cy_limb
 	a	8,8,7		# add res_limb
 	bge	Lp0
 	cax	10,10,6 	# adjust high limb for negative limb from s1
 Lp0:	bdz	Lend0
 	lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	9,0,6
 	mfmq	0
 	ae	8,0,10
 	l	7,4(3)
 	aze	9,9
 	a	8,8,7
 	bge	Lp1
 	cax	9,9,6		# adjust high limb for negative limb from s1
 Lp1:	bdn	Lploop
 
 	b	Lend
 
 Lneg:	cax	9,9,0
 	bdz	Lend
 Lnloop: lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	10,0,6
 	mfmq	7
 	ae	8,7,9
 	l	7,4(3)
 	ae	10,10,0 	# propagate cy to new cy_limb
 	a	8,8,7		# add res_limb
 	bge	Ln0
 	cax	10,10,6 	# adjust high limb for negative limb from s1
 Ln0:	bdz	Lend0
 	lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	9,0,6
 	mfmq	7
 	ae	8,7,10
 	l	7,4(3)
 	ae	9,9,0		# propagate cy to new cy_limb
 	a	8,8,7		# add res_limb
 	bge	Ln1
 	cax	9,9,6		# adjust high limb for negative limb from s1
 Ln1:	bdn	Lnloop
 	b	Lend
 
 Lend0:	cal	9,0(10)
 Lend:	st	8,4(3)
 	aze	3,9
 	br
 
diff --git a/mpi/power/mpih-mul3.S b/mpi/power/mpih-mul3.S
index 8bc317b7..8d54e84e 100644
--- a/mpi/power/mpih-mul3.S
+++ b/mpi/power/mpih-mul3.S
@@ -1,135 +1,135 @@
 /* IBM POWER submul_1 -- Multiply a limb vector with a limb and subtract
  *			 the result from a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*
 
 # INPUT PARAMETERS
 # res_ptr	r3
 # s1_ptr	r4
 # size		r5
 # s2_limb	r6
 
 # The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
 # obtain that operation, we have to use the 32x32->64 signed multiplication
 # instruction, and add the appropriate compensation to the high limb of the
 # result.  We add the multiplicand if the multiplier has its most significant
 # bit set, and we add the multiplier if the multiplicand has its most
 # significant bit set.	We need to preserve the carry flag between each
 # iteration, so we have to compute the compensation carefully (the natural,
 # srai+and doesn't work).  Since the POWER architecture has a branch unit
 # we can branch in zero cycles, so that's how we perform the additions.
  */
 
 	.toc
 	.csect ._gcry_mpih_submul_1[PR]
 	.align 2
 	.globl _gcry_mpih_submul_1
 	.globl ._gcry_mpih_submul_1
 	.csect _gcry_mpih_submul_1[DS]
 _gcry_mpih_submul_1:
 	.long ._gcry_mpih_submul_1[PR], TOC[tc0], 0
 	.csect ._gcry_mpih_submul_1[PR]
 ._gcry_mpih_submul_1:
 
 	cal	3,-4(3)
 	l	0,0(4)
 	cmpi	0,6,0
 	mtctr	5
 	mul	9,0,6
 	srai	7,0,31
 	and	7,7,6
 	mfmq	11
 	cax	9,9,7
 	l	7,4(3)
 	sf	8,11,7		# add res_limb
 	a	11,8,11 	# invert cy (r11 is junk)
 	blt	Lneg
 Lpos:	bdz	Lend
 
 Lploop: lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	10,0,6
 	mfmq	0
 	ae	11,0,9		# low limb + old_cy_limb + old cy
 	l	7,4(3)
 	aze	10,10		# propagate cy to new cy_limb
 	sf	8,11,7		# add res_limb
 	a	11,8,11 	# invert cy (r11 is junk)
 	bge	Lp0
 	cax	10,10,6 	# adjust high limb for negative limb from s1
 Lp0:	bdz	Lend0
 	lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	9,0,6
 	mfmq	0
 	ae	11,0,10
 	l	7,4(3)
 	aze	9,9
 	sf	8,11,7
 	a	11,8,11 	# invert cy (r11 is junk)
 	bge	Lp1
 	cax	9,9,6		# adjust high limb for negative limb from s1
 Lp1:	bdn	Lploop
 
 	b	Lend
 
 Lneg:	cax	9,9,0
 	bdz	Lend
 Lnloop: lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	10,0,6
 	mfmq	7
 	ae	11,7,9
 	l	7,4(3)
 	ae	10,10,0 	# propagate cy to new cy_limb
 	sf	8,11,7		# add res_limb
 	a	11,8,11 	# invert cy (r11 is junk)
 	bge	Ln0
 	cax	10,10,6 	# adjust high limb for negative limb from s1
 Ln0:	bdz	Lend0
 	lu	0,4(4)
 	stu	8,4(3)
 	cmpi	0,0,0
 	mul	9,0,6
 	mfmq	7
 	ae	11,7,10
 	l	7,4(3)
 	ae	9,9,0		# propagate cy to new cy_limb
 	sf	8,11,7		# add res_limb
 	a	11,8,11 	# invert cy (r11 is junk)
 	bge	Ln1
 	cax	9,9,6		# adjust high limb for negative limb from s1
 Ln1:	bdn	Lnloop
 	b	Lend
 
 Lend0:	cal	9,0(10)
 Lend:	st	8,4(3)
 	aze	3,9
 	br
 
diff --git a/mpi/power/mpih-rshift.S b/mpi/power/mpih-rshift.S
index f131a86d..aca51abf 100644
--- a/mpi/power/mpih-rshift.S
+++ b/mpi/power/mpih-rshift.S
@@ -1,64 +1,64 @@
 /* IBM POWER rshift
  *
  *      Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /*
 # INPUT PARAMETERS
 # res_ptr	r3
 # s_ptr 	r4
 # size		r5
 # cnt		r6
 */
 
 	.toc
 	.extern _gcry_mpih_rshift[DS]
 	.extern ._gcry_mpih_rshift
 .csect [PR]
 	.align 2
 	.globl _gcry_mpih_rshift
 	.globl ._gcry_mpih_rshift
 	.csect _gcry_mpih_rshift[DS]
 _gcry_mpih_rshift:
 	.long ._gcry_mpih_rshift, TOC[tc0], 0
 	.csect [PR]
 ._gcry_mpih_rshift:
 	sfi	8,6,32
 	mtctr	5		# put limb count in CTR loop register
 	l	0,0(4)		# read least significant limb
 	ai	9,3,-4		# adjust res_ptr since it's offset in the stu:s
 	sle	3,0,8		# compute carry limb, and init MQ register
 	bdz	Lend2		# if just one limb, skip loop
 	lu	0,4(4)		# read 2:nd least significant limb
 	sleq	7,0,8		# compute least significant limb of result
 	bdz	Lend		# if just two limb, skip loop
 Loop:	lu	0,4(4)		# load next higher limb
 	stu	7,4(9)		# store previous result during read latency
 	sleq	7,0,8		# compute result limb
 	bdn	Loop		# loop back until CTR is zero
 Lend:	stu	7,4(9)		# store 2:nd most significant limb
 Lend2:	sre	7,0,6		# compute most significant limb
 	st	7,4(9)		# store it
 	br
 
 
diff --git a/mpi/power/mpih-sub1.S b/mpi/power/mpih-sub1.S
index 02748fc5..03a8d9da 100644
--- a/mpi/power/mpih-sub1.S
+++ b/mpi/power/mpih-sub1.S
@@ -1,88 +1,88 @@
 /* IBM POWER sub_n -- Subtract two limb vectors of equal, non-zero length.
  *
  *      Copyright (C) 1992, 1994, 1995, 1996, 1999,
  *                    2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 /*
 # INPUT PARAMETERS
 # res_ptr	r3
 # s1_ptr	r4
 # s2_ptr	r5
 # size		r6
  */
 
 	.toc
 	.extern _gcry_mpih_sub_n[DS]
 	.extern ._gcry_mpih_sub_n
 .csect [PR]
 	.align 2
 	.globl _gcry_mpih_sub_n
 	.globl ._gcry_mpih_sub_n
 	.csect _gcry_mpih_sub_n[DS]
 _gcry_mpih_sub_n:
 	.long ._gcry_mpih_sub_n, TOC[tc0], 0
 	.csect [PR]
 ._gcry_mpih_sub_n:
 	andil.	10,6,1		# odd or even number of limbs?
 	l	8,0(4)		# load least significant s1 limb
 	l	0,0(5)		# load least significant s2 limb
 	cal	3,-4(3) 	# offset res_ptr, it's updated before it's used
 	sri	10,6,1		# count for unrolled loop
 	sf	7,0,8		# subtract least significant limbs, set cy
 	mtctr	10		# copy count into CTR
 	beq	0,Leven 	# branch if even # of limbs (# of limbs >= 2)
 
 # We have an odd # of limbs.  Add the first limbs separately.
 	cmpi	1,10,0		# is count for unrolled loop zero?
 	bne	1,L1		# branch if not
 	st	7,4(3)
 	sfe	3,0,0		# load !cy into ...
 	sfi	3,3,0		# ... return value register
 	br			# return
 
 # We added least significant limbs.  Now reload the next limbs to enter loop.
 L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
 	lu	0,4(5)		# load s2 limb and update s2_ptr
 	stu	7,4(3)
 	sfe	7,0,8		# subtract limbs, set cy
 Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
 	lu	10,4(5) 	# load s2 limb and update s2_ptr
 	bdz	Lend		# If done, skip loop
 
 Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
 	lu	0,4(5)		# load s2 limb and update s2_ptr
 	sfe	11,10,9 	# subtract previous limbs with cy, set cy
 	stu	7,4(3)		#
 	lu	9,4(4)		# load s1 limb and update s1_ptr
 	lu	10,4(5) 	# load s2 limb and update s2_ptr
 	sfe	7,0,8		# subtract previous limbs with cy, set cy
 	stu	11,4(3) 	#
 	bdn	Loop		# decrement CTR and loop back
 
 Lend:	sfe	11,10,9 	# subtract limbs with cy, set cy
 	st	7,4(3)		#
 	st	11,8(3) 	#
 	sfe	3,0,0		# load !cy into ...
 	sfi	3,3,0		# ... return value register
 	br
 
diff --git a/mpi/powerpc32/mpih-add1.S b/mpi/powerpc32/mpih-add1.S
index 1661f5e6..cd592943 100644
--- a/mpi/powerpc32/mpih-add1.S
+++ b/mpi/powerpc32/mpih-add1.S
@@ -1,136 +1,137 @@
 /* PowerPC-32 add_n -- Add two limb vectors of equal, non-zero length.
  *
  * Copyright (C) 1992, 1994, 1995, 1998, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 #ifndef USE_PPC_PATCHES
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	(r3)
  *		   mpi_ptr_t s1_ptr,	(r4)
  *		   mpi_ptr_t s2_ptr,	(r5)
  *		   mpi_size_t size)	(r6)
  */
 
 	.toc
 	.extern _gcry_mpih_add_n[DS]
 	.extern ._gcry_mpih_add_n
 .csect [PR]
 	.align 2
 	.globl _gcry_mpih_add_n
 	.globl ._gcry_mpih_add_n
 	.csect _gcry_mpih_add_n[DS]
 _gcry_mpih_add_n:
 	.long ._gcry_mpih_add_n, TOC[tc0], 0
 	.csect [PR]
 ._gcry_mpih_add_n:
 	mtctr	6		# copy size into CTR
 	lwz	8,0(4)		# load least significant s1 limb
 	lwz	0,0(5)		# load least significant s2 limb
 	addi	3,3,-4		# offset res_ptr, it is updated before used
 	addc	7,0,8		# add least significant limbs, set cy
 	bdz	Lend		# If done, skip loop
 Loop:	lwzu	8,4(4)		# load s1 limb and update s1_ptr
 	lwzu	0,4(5)		# load s2 limb and update s2_ptr
 	stwu	7,4(3)		# store previous limb in load latency slot
 	adde	7,0,8		# add new limbs with cy, set cy
 	bdnz	Loop		# decrement CTR and loop back
 Lend:	stw	7,4(3)		# store ultimate result limb
 	li	3,0		# load cy into ...
 	addze	3,3		# ... return value register
 	blr
 
 #else
 /* Add two limb vectors of equal, non-zero length for PowerPC.
    Copyright (C) 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.	*/
+   see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: LGPL-2.1-or-later
+*/
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 /* mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
 			mp_size_t size)
    Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1.  */
 
 /* Note on optimisation: This code is optimal for the 601.  Almost every other
    possible 2-unrolled inner loop will not be.	Also, watch out for the
    alignment...  */
 
 EALIGN(_gcry_mpih_add_n,3,0)
 /* Set up for loop below.  */
        mtcrf 0x01,%r6
        srwi. %r7,%r6,1
        li    %r10,0
        mtctr %r7
        bt    31,2f
 
 /* Clear the carry.  */
        addic %r0,%r0,0
 /* Adjust pointers for loop.  */
        addi  %r3,%r3,-4
        addi  %r4,%r4,-4
        addi  %r5,%r5,-4
        b     0f
 
 2:     lwz  %r7,0(%r5)
        lwz  %r6,0(%r4)
        addc %r6,%r6,%r7
        stw  %r6,0(%r3)
 	beq  1f
 
 /* The loop.  */
 
 /* Align start of loop to an odd word boundary to guarantee that the
    last two words can be fetched in one access (for 601).  */
 0:     lwz  %r9,4(%r4)
        lwz  %r8,4(%r5)
        lwzu %r6,8(%r4)
        lwzu %r7,8(%r5)
        adde %r8,%r9,%r8
        stw  %r8,4(%r3)
        adde %r6,%r6,%r7
        stwu %r6,8(%r3)
        bdnz 0b
 /* Return the carry.  */
 1:     addze %r3,%r10
        blr
 END(_gcry_mpih_add_n)
 #endif
 
diff --git a/mpi/powerpc32/mpih-lshift.S b/mpi/powerpc32/mpih-lshift.S
index 6231095d..0f66a403 100644
--- a/mpi/powerpc32/mpih-lshift.S
+++ b/mpi/powerpc32/mpih-lshift.S
@@ -1,198 +1,198 @@
 /* PowerPC-32 lshift
  *
  *      Copyright (C) 1995, 1998, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 #ifndef USE_PPC_PATCHES
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_lshift( mpi_ptr_t wp,	(r3)
  *		   mpi_ptr_t up,	(r4)
  *		   mpi_size_t usize,	(r5)
  *		   unsigned cnt)	(r6)
  */
 
 	.toc
 .csect	.text[PR]
 	.align	2
 	.globl	_gcry_mpih_lshift
 	.globl	._gcry_mpih_lshift
 	.csect	_gcry_mpih_lshift[DS]
 _gcry_mpih_lshift:
 	.long	._gcry_mpih_lshift,  TOC[tc0],	  0
 	.csect	.text[PR]
 ._gcry_mpih_lshift:
 	mtctr	5		# copy size into CTR
 	slwi	0,5,2
 	add	7,3,0		# make r7 point at end of res
 	add	4,4,0		# make r4 point at end of s1
 	subfic	8,6,32
 	lwzu	11,-4(4)	# load first s1 limb
 	srw	3,11,8		# compute function return value
 	bdz	Lend1
 
 Loop:	lwzu	10,-4(4)
 	slw	9,11,6
 	srw	12,10,8
 	or	9,9,12
 	stwu	9,-4(7)
 	bdz	Lend2
 	lwzu	11,-4(4)
 	slw	9,10,6
 	srw	12,11,8
 	or	9,9,12
 	stwu	9,-4(7)
 	bdnz	Loop
 
 Lend1:	slw	0,11,6
 	stw	0,-4(7)
 	blr
 
 Lend2:	slw	0,10,6
 	stw	0,-4(7)
 	blr
 
 #else
 /* Shift a limb left, low level routine.
    Copyright (C) 1996, 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.	*/
+   see <https://www.gnu.org/licenses/>.
+*/
 
 /* mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize,
 			unsigned int cnt)  */
 
 EALIGN(_gcry_mpih_lshift,3,0)
        mtctr   %r5	       # copy size into CTR
        cmplwi  %cr0,%r5,16     # is size < 16
        slwi    %r0,%r5,2
        add     %r7,%r3,%r0     # make r7 point at end of res
        add     %r4,%r4,%r0     # make r4 point at end of s1
        lwzu    %r11,-4(%r4)    # load first s1 limb
        subfic  %r8,%r6,32
        srw     %r3,%r11,%r8    # compute function return value
        bge     %cr0,L(big)     # branch if size >= 16
 
        bdz     L(end1)
 
 0:     lwzu    %r10,-4(%r4)
        slw     %r9,%r11,%r6
        srw     %r12,%r10,%r8
        or      %r9,%r9,%r12
        stwu    %r9,-4(%r7)
        bdz     L(end2)
        lwzu    %r11,-4(%r4)
        slw     %r9,%r10,%r6
        srw     %r12,%r11,%r8
        or      %r9,%r9,%r12
        stwu    %r9,-4(%r7)
        bdnz    0b
 
 L(end1):slw    %r0,%r11,%r6
        stw     %r0,-4(%r7)
        blr
 
 
 /* Guaranteed not to succeed.  */
 L(boom): tweq	 %r0,%r0
 
 /* We imitate a case statement, by using (yuk!) fixed-length code chunks,
    of size 4*12 bytes.	We have to do this (or something) to make this PIC.  */
 L(big):        mflr    %r9
        bltl-   %cr0,L(boom)    # Never taken, only used to set LR.
        slwi    %r10,%r6,4
        mflr    %r12
        add     %r10,%r12,%r10
        slwi    %r8,%r6,5
        add     %r10,%r8,%r10
        mtctr   %r10
        addi    %r5,%r5,-1
        mtlr    %r9
        bctr
 
 L(end2):slw    %r0,%r10,%r6
        stw     %r0,-4(%r7)
        blr
 
 #define DO_LSHIFT(n) \
        mtctr   %r5;						       \
 0:     lwzu    %r10,-4(%r4);					       \
        slwi    %r9,%r11,n;					       \
        inslwi  %r9,%r10,n,32-n; 				       \
        stwu    %r9,-4(%r7);					       \
        bdz-    L(end2); 					       \
        lwzu    %r11,-4(%r4);					       \
        slwi    %r9,%r10,n;					       \
        inslwi  %r9,%r11,n,32-n; 				       \
        stwu    %r9,-4(%r7);					       \
        bdnz    0b;						       \
        b       L(end1)
 
        DO_LSHIFT(1)
        DO_LSHIFT(2)
        DO_LSHIFT(3)
        DO_LSHIFT(4)
        DO_LSHIFT(5)
        DO_LSHIFT(6)
        DO_LSHIFT(7)
        DO_LSHIFT(8)
        DO_LSHIFT(9)
        DO_LSHIFT(10)
        DO_LSHIFT(11)
        DO_LSHIFT(12)
        DO_LSHIFT(13)
        DO_LSHIFT(14)
        DO_LSHIFT(15)
        DO_LSHIFT(16)
        DO_LSHIFT(17)
        DO_LSHIFT(18)
        DO_LSHIFT(19)
        DO_LSHIFT(20)
        DO_LSHIFT(21)
        DO_LSHIFT(22)
        DO_LSHIFT(23)
        DO_LSHIFT(24)
        DO_LSHIFT(25)
        DO_LSHIFT(26)
        DO_LSHIFT(27)
        DO_LSHIFT(28)
        DO_LSHIFT(29)
        DO_LSHIFT(30)
        DO_LSHIFT(31)
 
 END(_gcry_mpih_lshift)
 #endif
diff --git a/mpi/powerpc32/mpih-mul1.S b/mpi/powerpc32/mpih-mul1.S
index bd418f7e..75ff66af 100644
--- a/mpi/powerpc32/mpih-mul1.S
+++ b/mpi/powerpc32/mpih-mul1.S
@@ -1,120 +1,120 @@
 /* PowerPC-32 mul_1 -- Multiply a limb vector with a limb and store
  *		       the result in a second limb vector.
  *
  *      Copyright (C) 1992, 1993, 1994, 1995,
  *                    1998, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 #ifndef USE_PPC_PATCHES
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,	(r3)
  *		  mpi_ptr_t s1_ptr,	(r4)
  *		  mpi_size_t s1_size,	(r5)
  *		  mpi_limb_t s2_limb)	(r6)
  *
  * This is a fairly straightforward implementation.  The timing of the PC601
  * is hard to understand, so I will wait to optimize this until I have some
  * hardware to play with.
  *
  * The code trivially generalizes to 64 bit limbs for the PC620.
  */
 
 	.toc
 	.csect ._gcry_mpih_mul_1[PR]
 	.align 2
 	.globl _gcry_mpih_mul_1
 	.globl ._gcry_mpih_mul_1
 	.csect _gcry_mpih_mul_1[DS]
 _gcry_mpih_mul_1:
 	.long ._gcry_mpih_mul_1[PR], TOC[tc0], 0
 	.csect ._gcry_mpih_mul_1[PR]
 ._gcry_mpih_mul_1:
 	mtctr	5
 
 	lwz	0,0(4)
 	mullw	7,0,6
 	mulhwu	10,0,6
 	addi	3,3,-4		# adjust res_ptr
 	addic	5,5,0		# clear cy with dummy insn
 	bdz	Lend
 
 Loop:	lwzu	0,4(4)
 	stwu	7,4(3)
 	mullw	8,0,6
 	adde	7,8,10
 	mulhwu	10,0,6
 	bdnz	Loop
 
 Lend:	stw	7,4(3)
 	addze	3,10
 	blr
 
 #else
 /* Multiply a limb vector by a limb, for PowerPC.
    Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.	*/
-
+   see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: LGPL-2.1-or-later
+*/
 
 /* mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
 			mp_size_t s1_size, mp_limb_t s2_limb)
    Calculate s1*s2 and put result in res_ptr; return carry.  */
 
 ENTRY(_gcry_mpih_mul_1)
        mtctr   %r5
 
        lwz     %r0,0(%r4)
        mullw   %r7,%r0,%r6
        mulhwu  %r10,%r0,%r6
        addi    %r3,%r3,-4	       # adjust res_ptr
        addic   %r5,%r5,0	       # clear cy with dummy insn
        bdz     1f
 
 0:     lwzu    %r0,4(%r4)
        stwu    %r7,4(%r3)
        mullw   %r8,%r0,%r6
        adde    %r7,%r8,%r10
        mulhwu  %r10,%r0,%r6
        bdnz    0b
 
 1:     stw     %r7,4(%r3)
        addze   %r3,%r10
        blr
 END(_gcry_mpih_mul_1)
 #endif
diff --git a/mpi/powerpc32/mpih-mul2.S b/mpi/powerpc32/mpih-mul2.S
index 1d97b81a..bb3c81d5 100644
--- a/mpi/powerpc32/mpih-mul2.S
+++ b/mpi/powerpc32/mpih-mul2.S
@@ -1,127 +1,127 @@
 /* PowerPC-32 addmul_1 -- Multiply a limb vector with a limb and add
  *			  the result to a second limb vector.
  *
  *      Copyright (C) 1995, 1998, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 #ifndef USE_PPC_PATCHES
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,      (r3)
  *		     mpi_ptr_t s1_ptr,	     (r4)
  *		     mpi_size_t s1_size,     (r5)
  *		     mpi_limb_t s2_limb)     (r6)
  *
  * This is a fairly straightforward implementation.  The timing of the PC601
  * is hard to understand, so I will wait to optimize this until I have some
  * hardware to play with.
  *
  * The code trivially generalizes to 64 bit limbs for the PC620.
  */
 
 
 	.toc
 	.csect ._gcry_mpih_addmul_1[PR]
 	.align 2
 	.globl _gcry_mpih_addmul_1
 	.globl ._gcry_mpih_addmul_1
 	.csect _gcry_mpih_addmul_1[DS]
 _gcry_mpih_addmul_1:
 	.long ._gcry_mpih_addmul_1[PR], TOC[tc0], 0
 	.csect ._gcry_mpih_addmul_1[PR]
 ._gcry_mpih_addmul_1:
 	mtctr	5
 
 	lwz	0,0(4)
 	mullw	7,0,6
 	mulhwu	10,0,6
 	lwz	9,0(3)
 	addc	8,7,9
 	addi	3,3,-4
 	bdz	Lend
 
 Loop:	lwzu	0,4(4)
 	stwu	8,4(3)
 	mullw	8,0,6
 	adde	7,8,10
 	mulhwu	10,0,6
 	lwz	9,4(3)
 	addze	10,10
 	addc	8,7,9
 	bdnz	Loop
 
 Lend:	stw	8,4(3)
 	addze	3,10
 	blr
 
 #else
 /* Multiply a limb vector by a single limb, for PowerPC.
    Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.	*/
-
+   see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: LGPL-2.1-or-later
+*/
 
 /* mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
 			   mp_size_t s1_size, mp_limb_t s2_limb)
    Calculate res+s1*s2 and put result back in res; return carry.  */
 ENTRY(_gcry_mpih_addmul_1)
        mtctr   %r5
 
        lwz     %r0,0(%r4)
        mullw   %r7,%r0,%r6
        mulhwu  %r10,%r0,%r6
        lwz     %r9,0(%r3)
        addc    %r8,%r7,%r9
        addi    %r3,%r3,-4	       /* adjust res_ptr */
        bdz     1f
 
 0:     lwzu    %r0,4(%r4)
        stwu    %r8,4(%r3)
        mullw   %r8,%r0,%r6
        adde    %r7,%r8,%r10
        mulhwu  %r10,%r0,%r6
        lwz     %r9,4(%r3)
        addze   %r10,%r10
        addc    %r8,%r7,%r9
        bdnz    0b
 
 1:     stw     %r8,4(%r3)
        addze   %r3,%r10
        blr
 END(_gcry_mpih_addmul_1)
 #endif
diff --git a/mpi/powerpc32/mpih-mul3.S b/mpi/powerpc32/mpih-mul3.S
index c410dbb0..196fca1c 100644
--- a/mpi/powerpc32/mpih-mul3.S
+++ b/mpi/powerpc32/mpih-mul3.S
@@ -1,130 +1,131 @@
 /* PowerPC-32 submul_1 -- Multiply a limb vector with a limb and subtract
  *			  the result from a second limb vector.
  *
  *      Copyright (C) 1995, 1998, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 #ifndef USE_PPC_PATCHES
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,      (r3)
  *		     mpi_ptr_t s1_ptr,	     (r4)
  *		     mpi_size_t s1_size,     (r5)
  *		     mpi_limb_t s2_limb)     (r6)
  *
  * This is a fairly straightforward implementation.  The timing of the PC601
  * is hard to understand, so I will wait to optimize this until I have some
  * hardware to play with.
  *
  * The code trivially generalizes to 64 bit limbs for the PC620.
  */
 
 	.toc
 	.csect ._gcry_mpih_submul_1[PR]
 	.align 2
 	.globl _gcry_mpih_submul_1
 	.globl ._gcry_mpih_submul_1
 	.csect _gcry_mpih_submul_1[DS]
 _gcry_mpih_submul_1:
 	.long ._gcry_mpih_submul_1[PR], TOC[tc0], 0
 	.csect ._gcry_mpih_submul_1[PR]
 ._gcry_mpih_submul_1:
 	mtctr	5
 
 	lwz	0,0(4)
 	mullw	7,0,6
 	mulhwu	10,0,6
 	lwz	9,0(3)
 	subfc	8,7,9
 	addc	7,7,8		# invert cy (r7 is junk)
 	addi	3,3,-4
 	bdz	Lend
 
 Loop:	lwzu	0,4(4)
 	stwu	8,4(3)
 	mullw	8,0,6
 	adde	7,8,10
 	mulhwu	10,0,6
 	lwz	9,4(3)
 	addze	10,10
 	subfc	8,7,9
 	addc	7,7,8		# invert cy (r7 is junk)
 	bdnz	Loop
 
 Lend:	stw	8,4(3)
 	addze	3,10
 	blr
 
 #else
 /* Multiply a limb vector by a single limb, for PowerPC.
    Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.	*/
+   see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: LGPL-2.1-or-later
+*/
 
 /* mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
 			   mp_size_t s1_size, mp_limb_t s2_limb)
    Calculate res-s1*s2 and put result back in res; return carry.  */
 
 ENTRY(_gcry_mpih_submul_1)
        mtctr   %r5
 
        lwz     %r0,0(%r4)
        mullw   %r7,%r0,%r6
        mulhwu  %r10,%r0,%r6
        lwz     %r9,0(%r3)
        subf    %r8,%r7,%r9
        addc    %r7,%r7,%r8	       # invert cy (r7 is junk)
        addi    %r3,%r3,-4	       # adjust res_ptr
        bdz     1f
 
 0:     lwzu    %r0,4(%r4)
        stwu    %r8,4(%r3)
        mullw   %r8,%r0,%r6
        adde    %r7,%r8,%r10
        mulhwu  %r10,%r0,%r6
        lwz     %r9,4(%r3)
        addze   %r10,%r10
        subf    %r8,%r7,%r9
        addc    %r7,%r7,%r8	       # invert cy (r7 is junk)
        bdnz    0b
 
 1:     stw     %r8,4(%r3)
        addze   %r3,%r10
        blr
 END(_gcry_mpih_submul_1)
 #endif
diff --git a/mpi/powerpc32/mpih-rshift.S b/mpi/powerpc32/mpih-rshift.S
index 98349edb..f6702ad1 100644
--- a/mpi/powerpc32/mpih-rshift.S
+++ b/mpi/powerpc32/mpih-rshift.S
@@ -1,131 +1,131 @@
 /* PowerPC-32 rshift
  *
  *      Copyright (C) 1995, 1998, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 #ifndef USE_PPC_PATCHES
 
 /*******************
  * mpi_limb_t
  * _gcry_mpih_rshift( mpi_ptr_t wp,	(r3)
  *		   mpi_ptr_t up,	(r4)
  *		   mpi_size_t usize,	(r5)
  *		   unsigned cnt)	(r6)
  */
 
 	.toc
 .csect	.text[PR]
 	.align	2
 	.globl	_gcry_mpih_rshift
 	.globl	._gcry_mpih_rshift
 	.csect	_gcry_mpih_rshift[DS]
 _gcry_mpih_rshift:
 	.long	._gcry_mpih_rshift,  TOC[tc0],	  0
 	.csect	.text[PR]
 ._gcry_mpih_rshift:
 	mtctr	5		# copy size into CTR
 	addi	7,3,-4		# move adjusted res_ptr to free return reg
 	subfic	8,6,32
 	lwz	11,0(4) 	# load first s1 limb
 	slw	3,11,8		# compute function return value
 	bdz	Lend1
 
 Loop:	lwzu	10,4(4)
 	srw	9,11,6
 	slw	12,10,8
 	or	9,9,12
 	stwu	9,4(7)
 	bdz	Lend2
 	lwzu	11,4(4)
 	srw	9,10,6
 	slw	12,11,8
 	or	9,9,12
 	stwu	9,4(7)
 	bdnz	Loop
 
 Lend1:	srw	0,11,6
 	stw	0,4(7)
 	blr
 
 Lend2:	srw	0,10,6
 	stw	0,4(7)
 	blr
 
 #else
 /* Shift a limb right, low level routine.
    Copyright (C) 1995, 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.	*/
-
+   see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: LGPL-2.1-or-later
+*/
 
 /* INPUT PARAMETERS
    res_ptr     r3
    s1_ptr      r4
    size 	       r5
    cnt	       r6  */
 
 ENTRY(_gcry_mpih_rshift)
        mtctr   5	       # copy size into CTR
        addi    7,3,-4	       # move adjusted res_ptr to free return reg
        subfic  8,6,32
        lwz     11,0(4)	       # load first s1 limb
        slw     3,11,8	       # compute function return value
        bdz     1f
 
 0:     lwzu    10,4(4)
        srw     9,11,6
        slw     12,10,8
        or      9,9,12
        stwu    9,4(7)
        bdz     2f
        lwzu    11,4(4)
        srw     9,10,6
        slw     12,11,8
        or      9,9,12
        stwu    9,4(7)
        bdnz    0b
 
 1:     srw     0,11,6
        stw     0,4(7)
        blr
 
 2:     srw     0,10,6
        stw     0,4(7)
        blr
 END(_gcry_mpih_rshift)
 #endif
diff --git a/mpi/powerpc32/mpih-sub1.S b/mpi/powerpc32/mpih-sub1.S
index d612ea89..42672423 100644
--- a/mpi/powerpc32/mpih-sub1.S
+++ b/mpi/powerpc32/mpih-sub1.S
@@ -1,133 +1,134 @@
 /* PowerPC-32  sub_n -- Subtract two limb vectors of the same length > 0
  *			and store difference in a third limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998,
  *                    2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "sysdep.h"
 #include "asm-syntax.h"
 
 
 #ifndef USE_PPC_PATCHES
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	(r3)
  *		   mpi_ptr_t s1_ptr,	(r4)
  *		   mpi_ptr_t s2_ptr,	(r5)
  *		   mpi_size_t size)	(r6)
  */
 
 	.toc
 	.extern _gcry_mpih_sub_n[DS]
 	.extern ._gcry_mpih_sub_n
 .csect [PR]
 	.align 2
 	.globl _gcry_mpih_sub_n
 	.globl ._gcry_mpih_sub_n
 	.csect _gcry_mpih_sub_n[DS]
 _gcry_mpih_sub_n:
 	.long ._gcry_mpih_sub_n, TOC[tc0], 0
 	.csect [PR]
 ._gcry_mpih_sub_n:
 	mtctr	6		# copy size into CTR
 	lwz	8,0(4)		# load least significant s1 limb
 	lwz	0,0(5)		# load least significant s2 limb
 	addi	3,3,-4		# offset res_ptr, it is updated before used
 	subfc	7,0,8		# add least significant limbs, set cy
 	bdz	Lend		# If done, skip loop
 Loop:	lwzu	8,4(4)		# load s1 limb and update s1_ptr
 	lwzu	0,4(5)		# load s2 limb and update s2_ptr
 	stwu	7,4(3)		# store previous limb in load latency slot
 	subfe	7,0,8		# add new limbs with cy, set cy
 	bdnz	Loop		# decrement CTR and loop back
 Lend:	stw	7,4(3)		# store ultimate result limb
 	subfe	3,0,0		# load !cy into ...
 	subfic	3,3,0		# ... return value register
 	blr
 
 #else
 /* Subtract two limb vectors of equal, non-zero length for PowerPC.
    Copyright (C) 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.	*/
+   see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: LGPL-2.1-or-later
+*/
 
 /* mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
 			mp_size_t size)
    Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1.  */
 
 /* Note on optimisation: This code is optimal for the 601.  Almost every other
    possible 2-unrolled inner loop will not be.	Also, watch out for the
    alignment...  */
 
 EALIGN(_gcry_mpih_sub_n,3,1)
 /* Set up for loop below.  */
        mtcrf 0x01,%r6
        srwi. %r7,%r6,1
        mtctr %r7
        bt    31,2f
 
 /* Set the carry (clear the borrow).  */
        subfc %r0,%r0,%r0
 /* Adjust pointers for loop.  */
        addi  %r3,%r3,-4
        addi  %r4,%r4,-4
        addi  %r5,%r5,-4
        b     0f
 
 2:     lwz   %r7,0(%r5)
        lwz   %r6,0(%r4)
        subfc %r6,%r7,%r6
        stw   %r6,0(%r3)
 	beq   1f
 
 /* Align start of loop to an odd word boundary to guarantee that the
    last two words can be fetched in one access (for 601).  This turns
    out to be important.  */
 0:
        lwz   %r9,4(%r4)
        lwz   %r8,4(%r5)
        lwzu  %r6,8(%r4)
        lwzu  %r7,8(%r5)
        subfe %r8,%r8,%r9
        stw   %r8,4(%r3)
        subfe %r6,%r7,%r6
        stwu  %r6,8(%r3)
        bdnz  0b
 /* Return the borrow. */
 1:     subfe %r3,%r3,%r3
        neg   %r3,%r3
        blr
 END(_gcry_mpih_sub_n)
 #endif
diff --git a/mpi/powerpc32/syntax.h b/mpi/powerpc32/syntax.h
index 5d4af9f0..5f4e6cf6 100644
--- a/mpi/powerpc32/syntax.h
+++ b/mpi/powerpc32/syntax.h
@@ -1,75 +1,76 @@
 /* gmp2-2.0.2-ppc/mpn/powerpc-linux/syntax.h   Tue Oct	6 19:27:01 1998 */
 /* From glibc's sysdeps/unix/sysv/linux/powerpc/sysdep.h */
 
 /* Copyright (C) 1992, 1997, 1998 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.	*/
+   see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: LGPL-2-or-later
+*/
 
 
 #define USE_PPC_PATCHES 1
 
 /* This seems to always be the case on PPC.  */
 #define ALIGNARG(log2) log2
 /* For ELF we need the `.type' directive to make shared libs work right.  */
 #define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg;
 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name
 #define ASM_GLOBAL_DIRECTIVE   .globl
 
 #ifdef __STDC__
 #define C_LABEL(name) C_SYMBOL_NAME(name)##:
 #else
 #define C_LABEL(name) C_SYMBOL_NAME(name)/**/:
 #endif
 
 #ifdef __STDC__
 #define L(body) .L##body
 #else
 #define L(body) .L/**/body
 #endif
 
 /* No profiling of gmp's assembly for now... */
 #define CALL_MCOUNT /* no profiling */
 
 #define        ENTRY(name)				    \
   ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);		    \
   ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)	    \
   .align ALIGNARG(2);					    \
   C_LABEL(name) 					    \
   CALL_MCOUNT
 
 #define EALIGN_W_0  /* No words to insert.  */
 #define EALIGN_W_1  nop
 #define EALIGN_W_2  nop;nop
 #define EALIGN_W_3  nop;nop;nop
 #define EALIGN_W_4  EALIGN_W_3;nop
 #define EALIGN_W_5  EALIGN_W_4;nop
 #define EALIGN_W_6  EALIGN_W_5;nop
 #define EALIGN_W_7  EALIGN_W_6;nop
 
 /* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes
    past a 2^align boundary.  */
 #define EALIGN(name, alignt, words)			\
   ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);		\
   ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)	\
   .align ALIGNARG(alignt);				\
   EALIGN_W_##words;					\
   C_LABEL(name)
 
 #undef END
 #define END(name)		     \
   ASM_SIZE_DIRECTIVE(name)
 
diff --git a/mpi/sparc32/mpih-add1.S b/mpi/sparc32/mpih-add1.S
index 61a80ca3..d3488f51 100644
--- a/mpi/sparc32/mpih-add1.S
+++ b/mpi/sparc32/mpih-add1.S
@@ -1,239 +1,239 @@
 /* SPARC _add_n -- Add two limb vectors of the same length > 0 and store
  *		   sum in a third limb vector.
  *
  *      Copyright (C) 1995, 1996, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 
 /*******************
  *  mpi_limb_t
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,
  *		   mpi_ptr_t s1_ptr,
  *		   mpi_ptr_t s2_ptr,
  *		   mpi_size_t size)
  */
 
 ! INPUT PARAMETERS
 #define res_ptr %o0
 #define s1_ptr	%o1
 #define s2_ptr	%o2
 #define size	%o3
 
 #include "sysdep.h"
 
 	.text
 	.align	4
 	.global C_SYMBOL_NAME(_gcry_mpih_add_n)
 C_SYMBOL_NAME(_gcry_mpih_add_n):
 	xor	s2_ptr,res_ptr,%g1
 	andcc	%g1,4,%g0
 	bne	L1			! branch if alignment differs
 	nop
 ! **  V1a  **
 L0:	andcc	res_ptr,4,%g0		! res_ptr unaligned? Side effect: cy=0
 	be	L_v1			! if no, branch
 	nop
 /* Add least significant limb separately to align res_ptr and s2_ptr */
 	ld	[s1_ptr],%g4
 	add	s1_ptr,4,s1_ptr
 	ld	[s2_ptr],%g2
 	add	s2_ptr,4,s2_ptr
 	add	size,-1,size
 	addcc	%g4,%g2,%o4
 	st	%o4,[res_ptr]
 	add	res_ptr,4,res_ptr
 L_v1:	addx	%g0,%g0,%o4		! save cy in register
 	cmp	size,2			! if size < 2 ...
 	bl	Lend2			! ... branch to tail code
 	subcc	%g0,%o4,%g0		! restore cy
 
 	ld	[s1_ptr+0],%g4
 	addcc	size,-10,size
 	ld	[s1_ptr+4],%g1
 	ldd	[s2_ptr+0],%g2
 	blt	Lfin1
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 8 limbs until less than 8 limbs remain */
 Loop1:	addxcc	%g4,%g2,%o4
 	ld	[s1_ptr+8],%g4
 	addxcc	%g1,%g3,%o5
 	ld	[s1_ptr+12],%g1
 	ldd	[s2_ptr+8],%g2
 	std	%o4,[res_ptr+0]
 	addxcc	%g4,%g2,%o4
 	ld	[s1_ptr+16],%g4
 	addxcc	%g1,%g3,%o5
 	ld	[s1_ptr+20],%g1
 	ldd	[s2_ptr+16],%g2
 	std	%o4,[res_ptr+8]
 	addxcc	%g4,%g2,%o4
 	ld	[s1_ptr+24],%g4
 	addxcc	%g1,%g3,%o5
 	ld	[s1_ptr+28],%g1
 	ldd	[s2_ptr+24],%g2
 	std	%o4,[res_ptr+16]
 	addxcc	%g4,%g2,%o4
 	ld	[s1_ptr+32],%g4
 	addxcc	%g1,%g3,%o5
 	ld	[s1_ptr+36],%g1
 	ldd	[s2_ptr+32],%g2
 	std	%o4,[res_ptr+24]
 	addx	%g0,%g0,%o4		! save cy in register
 	addcc	size,-8,size
 	add	s1_ptr,32,s1_ptr
 	add	s2_ptr,32,s2_ptr
 	add	res_ptr,32,res_ptr
 	bge	Loop1
 	subcc	%g0,%o4,%g0		! restore cy
 
 Lfin1:	addcc	size,8-2,size
 	blt	Lend1
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 2 limbs until less than 2 limbs remain */
 Loope1: addxcc	%g4,%g2,%o4
 	ld	[s1_ptr+8],%g4
 	addxcc	%g1,%g3,%o5
 	ld	[s1_ptr+12],%g1
 	ldd	[s2_ptr+8],%g2
 	std	%o4,[res_ptr+0]
 	addx	%g0,%g0,%o4		! save cy in register
 	addcc	size,-2,size
 	add	s1_ptr,8,s1_ptr
 	add	s2_ptr,8,s2_ptr
 	add	res_ptr,8,res_ptr
 	bge	Loope1
 	subcc	%g0,%o4,%g0		! restore cy
 Lend1:	addxcc	%g4,%g2,%o4
 	addxcc	%g1,%g3,%o5
 	std	%o4,[res_ptr+0]
 	addx	%g0,%g0,%o4		! save cy in register
 
 	andcc	size,1,%g0
 	be	Lret1
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add last limb */
 	ld	[s1_ptr+8],%g4
 	ld	[s2_ptr+8],%g2
 	addxcc	%g4,%g2,%o4
 	st	%o4,[res_ptr+8]
 
 Lret1:	retl
 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 
 L1:	xor	s1_ptr,res_ptr,%g1
 	andcc	%g1,4,%g0
 	bne	L2
 	nop
 ! **  V1b  **
 	mov	s2_ptr,%g1
 	mov	s1_ptr,s2_ptr
 	b	L0
 	mov	%g1,s1_ptr
 
 ! **  V2  **
 /* If we come here, the alignment of s1_ptr and res_ptr as well as the
    alignment of s2_ptr and res_ptr differ.  Since there are only two ways
    things can be aligned (that we care about) we now know that the alignment
    of s1_ptr and s2_ptr are the same.  */
 
 L2:	cmp	size,1
 	be	Ljone
 	nop
 	andcc	s1_ptr,4,%g0		! s1_ptr unaligned? Side effect: cy=0
 	be	L_v2			! if no, branch
 	nop
 /* Add least significant limb separately to align s1_ptr and s2_ptr */
 	ld	[s1_ptr],%g4
 	add	s1_ptr,4,s1_ptr
 	ld	[s2_ptr],%g2
 	add	s2_ptr,4,s2_ptr
 	add	size,-1,size
 	addcc	%g4,%g2,%o4
 	st	%o4,[res_ptr]
 	add	res_ptr,4,res_ptr
 
 L_v2:	addx	%g0,%g0,%o4		! save cy in register
 	addcc	size,-8,size
 	blt	Lfin2
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add blocks of 8 limbs until less than 8 limbs remain */
 Loop2:	ldd	[s1_ptr+0],%g2
 	ldd	[s2_ptr+0],%o4
 	addxcc	%g2,%o4,%g2
 	st	%g2,[res_ptr+0]
 	addxcc	%g3,%o5,%g3
 	st	%g3,[res_ptr+4]
 	ldd	[s1_ptr+8],%g2
 	ldd	[s2_ptr+8],%o4
 	addxcc	%g2,%o4,%g2
 	st	%g2,[res_ptr+8]
 	addxcc	%g3,%o5,%g3
 	st	%g3,[res_ptr+12]
 	ldd	[s1_ptr+16],%g2
 	ldd	[s2_ptr+16],%o4
 	addxcc	%g2,%o4,%g2
 	st	%g2,[res_ptr+16]
 	addxcc	%g3,%o5,%g3
 	st	%g3,[res_ptr+20]
 	ldd	[s1_ptr+24],%g2
 	ldd	[s2_ptr+24],%o4
 	addxcc	%g2,%o4,%g2
 	st	%g2,[res_ptr+24]
 	addxcc	%g3,%o5,%g3
 	st	%g3,[res_ptr+28]
 	addx	%g0,%g0,%o4		! save cy in register
 	addcc	size,-8,size
 	add	s1_ptr,32,s1_ptr
 	add	s2_ptr,32,s2_ptr
 	add	res_ptr,32,res_ptr
 	bge	Loop2
 	subcc	%g0,%o4,%g0		! restore cy
 
 Lfin2:	addcc	size,8-2,size
 	blt	Lend2
 	subcc	%g0,%o4,%g0		! restore cy
 Loope2: ldd	[s1_ptr+0],%g2
 	ldd	[s2_ptr+0],%o4
 	addxcc	%g2,%o4,%g2
 	st	%g2,[res_ptr+0]
 	addxcc	%g3,%o5,%g3
 	st	%g3,[res_ptr+4]
 	addx	%g0,%g0,%o4		! save cy in register
 	addcc	size,-2,size
 	add	s1_ptr,8,s1_ptr
 	add	s2_ptr,8,s2_ptr
 	add	res_ptr,8,res_ptr
 	bge	Loope2
 	subcc	%g0,%o4,%g0		! restore cy
 Lend2:	andcc	size,1,%g0
 	be	Lret2
 	subcc	%g0,%o4,%g0		! restore cy
 /* Add last limb */
 Ljone:	ld	[s1_ptr],%g4
 	ld	[s2_ptr],%g2
 	addxcc	%g4,%g2,%o4
 	st	%o4,[res_ptr]
 
 Lret2:	retl
 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 
 
 
diff --git a/mpi/sparc32/mpih-lshift.S b/mpi/sparc32/mpih-lshift.S
index 3422ab04..4d544ede 100644
--- a/mpi/sparc32/mpih-lshift.S
+++ b/mpi/sparc32/mpih-lshift.S
@@ -1,97 +1,97 @@
 /* sparc lshift
  *
  *      Copyright (C) 1995, 1996, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 
 ! INPUT PARAMETERS
 ! res_ptr	%o0
 ! src_ptr	%o1
 ! size		%o2
 ! cnt		%o3
 
 #include "sysdep.h"
 
 	.text
 	.align	4
 	.global C_SYMBOL_NAME(_gcry_mpih_lshift)
 C_SYMBOL_NAME(_gcry_mpih_lshift):
 	sll	%o2,2,%g1
 	add	%o1,%g1,%o1	! make %o1 point at end of src
 	ld	[%o1-4],%g2	! load first limb
 	sub	%g0,%o3,%o5	! negate shift count
 	add	%o0,%g1,%o0	! make %o0 point at end of res
 	add	%o2,-1,%o2
 	andcc	%o2,4-1,%g4	! number of limbs in first loop
 	srl	%g2,%o5,%g1	! compute function result
 	be	L0		! if multiple of 4 limbs, skip first loop
 	st	%g1,[%sp+80]
 
 	sub	%o2,%g4,%o2	! adjust count for main loop
 
 Loop0:	ld	[%o1-8],%g3
 	add	%o0,-4,%o0
 	add	%o1,-4,%o1
 	addcc	%g4,-1,%g4
 	sll	%g2,%o3,%o4
 	srl	%g3,%o5,%g1
 	mov	%g3,%g2
 	or	%o4,%g1,%o4
 	bne	Loop0
 	 st	%o4,[%o0+0]
 
 L0:	tst	%o2
 	be	Lend
 	 nop
 
 Loop:	ld	[%o1-8],%g3
 	add	%o0,-16,%o0
 	addcc	%o2,-4,%o2
 	sll	%g2,%o3,%o4
 	srl	%g3,%o5,%g1
 
 	ld	[%o1-12],%g2
 	sll	%g3,%o3,%g4
 	or	%o4,%g1,%o4
 	st	%o4,[%o0+12]
 	srl	%g2,%o5,%g1
 
 	ld	[%o1-16],%g3
 	sll	%g2,%o3,%o4
 	or	%g4,%g1,%g4
 	st	%g4,[%o0+8]
 	srl	%g3,%o5,%g1
 
 	ld	[%o1-20],%g2
 	sll	%g3,%o3,%g4
 	or	%o4,%g1,%o4
 	st	%o4,[%o0+4]
 	srl	%g2,%o5,%g1
 
 	add	%o1,-16,%o1
 	or	%g4,%g1,%g4
 	bne	Loop
 	 st	%g4,[%o0+0]
 
 Lend:	sll	%g2,%o3,%g2
 	st	%g2,[%o0-4]
 	retl
 	ld	[%sp+80],%o0
 
diff --git a/mpi/sparc32/mpih-rshift.S b/mpi/sparc32/mpih-rshift.S
index cd3db41d..a8552d79 100644
--- a/mpi/sparc32/mpih-rshift.S
+++ b/mpi/sparc32/mpih-rshift.S
@@ -1,93 +1,93 @@
 /* sparc rshift
  *
  *      Copyright (C) 1995, 1996, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 ! INPUT PARAMETERS
 ! res_ptr	%o0
 ! src_ptr	%o1
 ! size		%o2
 ! cnt		%o3
 
 #include "sysdep.h"
 
 	.text
 	.align	4
 	.global C_SYMBOL_NAME(_gcry_mpih_rshift)
 C_SYMBOL_NAME(_gcry_mpih_rshift):
 	ld	[%o1],%g2	! load first limb
 	sub	%g0,%o3,%o5	! negate shift count
 	add	%o2,-1,%o2
 	andcc	%o2,4-1,%g4	! number of limbs in first loop
 	sll	%g2,%o5,%g1	! compute function result
 	be	L0		! if multiple of 4 limbs, skip first loop
 	st	%g1,[%sp+80]
 
 	sub	%o2,%g4,%o2	! adjust count for main loop
 
 Loop0:	ld	[%o1+4],%g3
 	add	%o0,4,%o0
 	add	%o1,4,%o1
 	addcc	%g4,-1,%g4
 	srl	%g2,%o3,%o4
 	sll	%g3,%o5,%g1
 	mov	%g3,%g2
 	or	%o4,%g1,%o4
 	bne	Loop0
 	 st	%o4,[%o0-4]
 
 L0:	tst	%o2
 	be	Lend
 	 nop
 
 Loop:	ld	[%o1+4],%g3
 	add	%o0,16,%o0
 	addcc	%o2,-4,%o2
 	srl	%g2,%o3,%o4
 	sll	%g3,%o5,%g1
 
 	ld	[%o1+8],%g2
 	srl	%g3,%o3,%g4
 	or	%o4,%g1,%o4
 	st	%o4,[%o0-16]
 	sll	%g2,%o5,%g1
 
 	ld	[%o1+12],%g3
 	srl	%g2,%o3,%o4
 	or	%g4,%g1,%g4
 	st	%g4,[%o0-12]
 	sll	%g3,%o5,%g1
 
 	ld	[%o1+16],%g2
 	srl	%g3,%o3,%g4
 	or	%o4,%g1,%o4
 	st	%o4,[%o0-8]
 	sll	%g2,%o5,%g1
 
 	add	%o1,16,%o1
 	or	%g4,%g1,%g4
 	bne	Loop
 	 st	%g4,[%o0-4]
 
 Lend:	srl	%g2,%o3,%g2
 	st	%g2,[%o0-0]
 	retl
 	ld	[%sp+80],%o0
 
diff --git a/mpi/sparc32/udiv.S b/mpi/sparc32/udiv.S
index 006b5c12..04841fff 100644
--- a/mpi/sparc32/udiv.S
+++ b/mpi/sparc32/udiv.S
@@ -1,195 +1,195 @@
 /* SPARC v7 __udiv_qrnnd division support, used from longlong.h.
  *           This is for v7 CPUs without a floating-point unit.
  *
  *      Copyright (C) 1993, 1994, 1996, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 ! INPUT PARAMETERS
 ! rem_ptr	o0
 ! n1		o1
 ! n0		o2
 ! d		o3
 
 #include "sysdep.h"
 
 	.text
 	.align 4
 	.global C_SYMBOL_NAME(__udiv_qrnnd)
 C_SYMBOL_NAME(__udiv_qrnnd):
 	tst	%o3
 	bneg	Largedivisor
 	mov	8,%g1
 
 	b	Lp1
 	addxcc	%o2,%o2,%o2
 
 Lplop:	bcc	Ln1
 	addxcc	%o2,%o2,%o2
 Lp1:	addx	%o1,%o1,%o1
 	subcc	%o1,%o3,%o4
 	bcc	Ln2
 	addxcc	%o2,%o2,%o2
 Lp2:	addx	%o1,%o1,%o1
 	subcc	%o1,%o3,%o4
 	bcc	Ln3
 	addxcc	%o2,%o2,%o2
 Lp3:	addx	%o1,%o1,%o1
 	subcc	%o1,%o3,%o4
 	bcc	Ln4
 	addxcc	%o2,%o2,%o2
 Lp4:	addx	%o1,%o1,%o1
 	addcc	%g1,-1,%g1
 	bne	Lplop
 	subcc	%o1,%o3,%o4
 	bcc	Ln5
 	addxcc	%o2,%o2,%o2
 Lp5:	st	%o1,[%o0]
 	retl
 	xnor	%g0,%o2,%o0
 
 Lnlop:	bcc	Lp1
 	addxcc	%o2,%o2,%o2
 Ln1:	addx	%o4,%o4,%o4
 	subcc	%o4,%o3,%o1
 	bcc	Lp2
 	addxcc	%o2,%o2,%o2
 Ln2:	addx	%o4,%o4,%o4
 	subcc	%o4,%o3,%o1
 	bcc	Lp3
 	addxcc	%o2,%o2,%o2
 Ln3:	addx	%o4,%o4,%o4
 	subcc	%o4,%o3,%o1
 	bcc	Lp4
 	addxcc	%o2,%o2,%o2
 Ln4:	addx	%o4,%o4,%o4
 	addcc	%g1,-1,%g1
 	bne	Lnlop
 	subcc	%o4,%o3,%o1
 	bcc	Lp5
 	addxcc	%o2,%o2,%o2
 Ln5:	st	%o4,[%o0]
 	retl
 	xnor	%g0,%o2,%o0
 
 Largedivisor:
 	and	%o2,1,%o5	! %o5 = n0 & 1
 
 	srl	%o2,1,%o2
 	sll	%o1,31,%g2
 	or	%g2,%o2,%o2	! %o2 = lo(n1n0 >> 1)
 	srl	%o1,1,%o1	! %o1 = hi(n1n0 >> 1)
 
 	and	%o3,1,%g2
 	srl	%o3,1,%g3	! %g3 = floor(d / 2)
 	add	%g3,%g2,%g3	! %g3 = ceil(d / 2)
 
 	b	LLp1
 	addxcc	%o2,%o2,%o2
 
 LLplop: bcc	LLn1
 	addxcc	%o2,%o2,%o2
 LLp1:	addx	%o1,%o1,%o1
 	subcc	%o1,%g3,%o4
 	bcc	LLn2
 	addxcc	%o2,%o2,%o2
 LLp2:	addx	%o1,%o1,%o1
 	subcc	%o1,%g3,%o4
 	bcc	LLn3
 	addxcc	%o2,%o2,%o2
 LLp3:	addx	%o1,%o1,%o1
 	subcc	%o1,%g3,%o4
 	bcc	LLn4
 	addxcc	%o2,%o2,%o2
 LLp4:	addx	%o1,%o1,%o1
 	addcc	%g1,-1,%g1
 	bne	LLplop
 	subcc	%o1,%g3,%o4
 	bcc	LLn5
 	addxcc	%o2,%o2,%o2
 LLp5:	add	%o1,%o1,%o1	! << 1
 	tst	%g2
 	bne	Oddp
 	add	%o5,%o1,%o1
 	st	%o1,[%o0]
 	retl
 	xnor	%g0,%o2,%o0
 
 LLnlop: bcc	LLp1
 	addxcc	%o2,%o2,%o2
 LLn1:	addx	%o4,%o4,%o4
 	subcc	%o4,%g3,%o1
 	bcc	LLp2
 	addxcc	%o2,%o2,%o2
 LLn2:	addx	%o4,%o4,%o4
 	subcc	%o4,%g3,%o1
 	bcc	LLp3
 	addxcc	%o2,%o2,%o2
 LLn3:	addx	%o4,%o4,%o4
 	subcc	%o4,%g3,%o1
 	bcc	LLp4
 	addxcc	%o2,%o2,%o2
 LLn4:	addx	%o4,%o4,%o4
 	addcc	%g1,-1,%g1
 	bne	LLnlop
 	subcc	%o4,%g3,%o1
 	bcc	LLp5
 	addxcc	%o2,%o2,%o2
 LLn5:	add	%o4,%o4,%o4	! << 1
 	tst	%g2
 	bne	Oddn
 	add	%o5,%o4,%o4
 	st	%o4,[%o0]
 	retl
 	xnor	%g0,%o2,%o0
 
 Oddp:	xnor	%g0,%o2,%o2
 	! q' in %o2. r' in %o1
 	addcc	%o1,%o2,%o1
 	bcc	LLp6
 	addx	%o2,0,%o2
 	sub	%o1,%o3,%o1
 LLp6:	subcc	%o1,%o3,%g0
 	bcs	LLp7
 	subx	%o2,-1,%o2
 	sub	%o1,%o3,%o1
 LLp7:	st	%o1,[%o0]
 	retl
 	mov	%o2,%o0
 
 Oddn:	xnor	%g0,%o2,%o2
 	! q' in %o2. r' in %o4
 	addcc	%o4,%o2,%o4
 	bcc	LLn6
 	addx	%o2,0,%o2
 	sub	%o4,%o3,%o4
 LLn6:	subcc	%o4,%o3,%g0
 	bcs	LLn7
 	subx	%o2,-1,%o2
 	sub	%o4,%o3,%o4
 LLn7:	st	%o4,[%o0]
 	retl
 	mov	%o2,%o0
diff --git a/mpi/sparc32v8/mpih-mul1.S b/mpi/sparc32v8/mpih-mul1.S
index 03fcddab..162416b7 100644
--- a/mpi/sparc32v8/mpih-mul1.S
+++ b/mpi/sparc32v8/mpih-mul1.S
@@ -1,109 +1,109 @@
 /* SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
  *                         store the product in a second limb vector.
  *
  *      Copyright (C) 1992, 1994, 1995, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 ! INPUT PARAMETERS
 ! res_ptr	o0
 ! s1_ptr	o1
 ! size		o2
 ! s2_limb	o3
 
 #include "sysdep.h"
 
 .text
 	.align	8
 	.global C_SYMBOL_NAME(_gcry_mpih_mul_1)
 C_SYMBOL_NAME(_gcry_mpih_mul_1):
 	sll	%o2,4,%g1
 	and	%g1,(4-1)<<4,%g1
 #if PIC
 	mov	%o7,%g4 		! Save return address register
 	call	1f
 	add	%o7,LL-1f,%g3
 1:	mov	%g4,%o7 		! Restore return address register
 #else
 	sethi	%hi(LL),%g3
 	or	%g3,%lo(LL),%g3
 #endif
 	jmp	%g3+%g1
 	ld	[%o1+0],%o4	! 1
 LL:
 LL00:	add	%o0,-4,%o0
 	add	%o1,-4,%o1
 	b	Loop00		/* 4, 8, 12, ... */
 	orcc	%g0,%g0,%g2
 LL01:	b	Loop01		/* 1, 5, 9, ... */
 	orcc	%g0,%g0,%g2
 	nop
 	nop
 LL10:	add	%o0,-12,%o0	/* 2, 6, 10, ... */
 	add	%o1,4,%o1
 	b	Loop10
 	orcc	%g0,%g0,%g2
 	nop
 LL11:	add	%o0,-8,%o0	/* 3, 7, 11, ... */
 	add	%o1,-8,%o1
 	b	Loop11
 	orcc	%g0,%g0,%g2
 
 Loop:	addcc	%g3,%g2,%g3	! 1
 	ld	[%o1+4],%o4	! 2
 	st	%g3,[%o0+0]	! 1
 	rd	%y,%g2		! 1
 Loop00: umul	%o4,%o3,%g3	! 2
 	addxcc	%g3,%g2,%g3	! 2
 	ld	[%o1+8],%o4	! 3
 	st	%g3,[%o0+4]	! 2
 	rd	%y,%g2		! 2
 Loop11: umul	%o4,%o3,%g3	! 3
 	addxcc	%g3,%g2,%g3	! 3
 	ld	[%o1+12],%o4	! 4
 	add	%o1,16,%o1
 	st	%g3,[%o0+8]	! 3
 	rd	%y,%g2		! 3
 Loop10: umul	%o4,%o3,%g3	! 4
 	addxcc	%g3,%g2,%g3	! 4
 	ld	[%o1+0],%o4	! 1
 	st	%g3,[%o0+12]	! 4
 	add	%o0,16,%o0
 	rd	%y,%g2		! 4
 	addx	%g0,%g2,%g2
 Loop01: addcc	%o2,-4,%o2
 	bg	Loop
 	umul	%o4,%o3,%g3	! 1
 
 	addcc	%g3,%g2,%g3	! 4
 	st	%g3,[%o0+0]	! 4
 	rd	%y,%g2		! 4
 
 	retl
 	addx	%g0,%g2,%o0
 
 
diff --git a/mpi/sparc32v8/mpih-mul2.S b/mpi/sparc32v8/mpih-mul2.S
index 6f5cc436..a9409836 100644
--- a/mpi/sparc32v8/mpih-mul2.S
+++ b/mpi/sparc32v8/mpih-mul2.S
@@ -1,132 +1,132 @@
 /* SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
  *                            add the result to a second limb vector.
  *
  *      Copyright (C) 1992, 1993, 1994, 1995, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 ! INPUT PARAMETERS
 ! res_ptr	o0
 ! s1_ptr	o1
 ! size		o2
 ! s2_limb	o3
 
 #include "sysdep.h"
 
 .text
 	.align 4
 	.global C_SYMBOL_NAME(_gcry_mpih_addmul_1)
 C_SYMBOL_NAME(_gcry_mpih_addmul_1):
 	orcc	%g0,%g0,%g2
 	ld	[%o1+0],%o4	! 1
 
 	sll	%o2,4,%g1
 	and	%g1,(4-1)<<4,%g1
 #if PIC
 	mov	%o7,%g4 		! Save return address register
 	call	1f
 	add	%o7,LL-1f,%g3
 1:	mov	%g4,%o7 		! Restore return address register
 #else
 	sethi	%hi(LL),%g3
 	or	%g3,%lo(LL),%g3
 #endif
 	jmp	%g3+%g1
 	nop
 LL:
 LL00:	add	%o0,-4,%o0
 	b	Loop00		/* 4, 8, 12, ... */
 	add	%o1,-4,%o1
 	nop
 LL01:	b	Loop01		/* 1, 5, 9, ... */
 	nop
 	nop
 	nop
 LL10:	add	%o0,-12,%o0	/* 2, 6, 10, ... */
 	b	Loop10
 	add	%o1,4,%o1
 	nop
 LL11:	add	%o0,-8,%o0	/* 3, 7, 11, ... */
 	b	Loop11
 	add	%o1,-8,%o1
 	nop
 
 1:	addcc	%g3,%g2,%g3	! 1
 	ld	[%o1+4],%o4	! 2
 	rd	%y,%g2		! 1
 	addx	%g0,%g2,%g2
 	ld	[%o0+0],%g1	! 2
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+0]	! 1
 Loop00: umul	%o4,%o3,%g3	! 2
 	ld	[%o0+4],%g1	! 2
 	addxcc	%g3,%g2,%g3	! 2
 	ld	[%o1+8],%o4	! 3
 	rd	%y,%g2		! 2
 	addx	%g0,%g2,%g2
 	nop
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+4]	! 2
 Loop11: umul	%o4,%o3,%g3	! 3
 	addxcc	%g3,%g2,%g3	! 3
 	ld	[%o1+12],%o4	! 4
 	rd	%y,%g2		! 3
 	add	%o1,16,%o1
 	addx	%g0,%g2,%g2
 	ld	[%o0+8],%g1	! 2
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+8]	! 3
 Loop10: umul	%o4,%o3,%g3	! 4
 	addxcc	%g3,%g2,%g3	! 4
 	ld	[%o1+0],%o4	! 1
 	rd	%y,%g2		! 4
 	addx	%g0,%g2,%g2
 	ld	[%o0+12],%g1	! 2
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+12]	! 4
 	add	%o0,16,%o0
 	addx	%g0,%g2,%g2
 Loop01: addcc	%o2,-4,%o2
 	bg	1b
 	umul	%o4,%o3,%g3	! 1
 
 	addcc	%g3,%g2,%g3	! 4
 	rd	%y,%g2		! 4
 	addx	%g0,%g2,%g2
 	ld	[%o0+0],%g1	! 2
 	addcc	%g1,%g3,%g3
 	st	%g3,[%o0+0]	! 4
 	addx	%g0,%g2,%o0
 
 	retl
 	 nop
 
 
 !	umul, ld, addxcc, rd, st
 
 !	umul, ld, addxcc, rd, ld, addcc, st, addx
 
diff --git a/mpi/sparc32v8/mpih-mul3.S b/mpi/sparc32v8/mpih-mul3.S
index 93bb1943..46748496 100644
--- a/mpi/sparc32v8/mpih-mul3.S
+++ b/mpi/sparc32v8/mpih-mul3.S
@@ -1,67 +1,67 @@
 /* SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
  *                            subtract the result from a second limb vector.
  *
  *      Copyright (C) 1992, 1993, 1994, 1998, 
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
         
 ! INPUT PARAMETERS
 ! res_ptr	o0
 ! s1_ptr	o1
 ! size		o2
 ! s2_limb	o3
 
 #include "sysdep.h"
 
 .text
 	.align 4
 	.global C_SYMBOL_NAME(_gcry_mpih_submul_1)
 C_SYMBOL_NAME(_gcry_mpih_submul_1):
 	sub	%g0,%o2,%o2		! negate ...
 	sll	%o2,2,%o2		! ... and scale size
 	sub	%o1,%o2,%o1		! o1 is offset s1_ptr
 	sub	%o0,%o2,%g1		! g1 is offset res_ptr
 
 	mov	0,%o0			! clear cy_limb
 
 Loop:	ld	[%o1+%o2],%o4
 	ld	[%g1+%o2],%g2
 	umul	%o4,%o3,%o5
 	rd	%y,%g3
 	addcc	%o5,%o0,%o5
 	addx	%g3,0,%o0
 	subcc	%g2,%o5,%g2
 	addx	%o0,0,%o0
 	st	%g2,[%g1+%o2]
 
 	addcc	%o2,4,%o2
 	bne	Loop
 	 nop
 
 	retl
 	 nop
 
 
diff --git a/mpi/supersparc/udiv.S b/mpi/supersparc/udiv.S
index 79e506a1..2582e358 100644
--- a/mpi/supersparc/udiv.S
+++ b/mpi/supersparc/udiv.S
@@ -1,118 +1,118 @@
 /* SuperSPARC __udiv_qrnnd division support, used from longlong.h.
  *            This is for SuperSPARC only, to compensate for its
  *            semi-functional udiv instruction.
  *
  *      Copyright (C) 1993, 1994, 1996, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
  *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 
 
 ! INPUT PARAMETERS
 ! rem_ptr	i0
 ! n1		i1
 ! n0		i2
 ! d		i3
 
 #include "sysdep.h"
 #undef ret	/* Kludge for glibc */
 
 	.text
 	.align	8
 LC0:	.double 0r4294967296
 LC1:	.double 0r2147483648
 
 	.align	4
 	.global C_SYMBOL_NAME(__udiv_qrnnd)
 C_SYMBOL_NAME(__udiv_qrnnd):
 	!#PROLOGUE# 0
 	save	%sp,-104,%sp
 	!#PROLOGUE# 1
 	st	%i1,[%fp-8]
 	ld	[%fp-8],%f10
 	sethi	%hi(LC0),%o7
 	fitod	%f10,%f4
 	ldd	[%o7+%lo(LC0)],%f8
 	cmp	%i1,0
 	bge	L248
 	mov	%i0,%i5
 	faddd	%f4,%f8,%f4
 L248:
 	st	%i2,[%fp-8]
 	ld	[%fp-8],%f10
 	fmuld	%f4,%f8,%f6
 	cmp	%i2,0
 	bge	L249
 	fitod	%f10,%f2
 	faddd	%f2,%f8,%f2
 L249:
 	st	%i3,[%fp-8]
 	faddd	%f6,%f2,%f2
 	ld	[%fp-8],%f10
 	cmp	%i3,0
 	bge	L250
 	fitod	%f10,%f4
 	faddd	%f4,%f8,%f4
 L250:
 	fdivd	%f2,%f4,%f2
 	sethi	%hi(LC1),%o7
 	ldd	[%o7+%lo(LC1)],%f4
 	fcmped	%f2,%f4
 	nop
 	fbge,a	L251
 	fsubd	%f2,%f4,%f2
 	fdtoi	%f2,%f2
 	st	%f2,[%fp-8]
 	b	L252
 	ld	[%fp-8],%i4
 L251:
 	fdtoi	%f2,%f2
 	st	%f2,[%fp-8]
 	ld	[%fp-8],%i4
 	sethi	%hi(-2147483648),%g2
 	xor	%i4,%g2,%i4
 L252:
 	umul	%i3,%i4,%g3
 	rd	%y,%i0
 	subcc	%i2,%g3,%o7
 	subxcc	%i1,%i0,%g0
 	be	L253
 	cmp	%o7,%i3
 
 	add	%i4,-1,%i0
 	add	%o7,%i3,%o7
 	st	%o7,[%i5]
 	ret
 	restore
 L253:
 	blu	L246
 	mov	%i4,%i0
 	add	%i4,1,%i0
 	sub	%o7,%i3,%o7
 L246:
 	st	%o7,[%i5]
 	ret
 	restore
 
diff --git a/random/random.h b/random/random.h
index eedf1d0f..dee1f9c6 100644
--- a/random/random.h
+++ b/random/random.h
@@ -1,69 +1,69 @@
 /* random.h - random functions
  *	Copyright (C) 1998, 2002, 2006 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 #ifndef G10_RANDOM_H
 #define G10_RANDOM_H
 
 #include "types.h"
 #include "../src/gcrypt-testapi.h"  /* struct gcry_drbg_test_vector */
 
 /*-- random.c --*/
 void _gcry_register_random_progress (void (*cb)(void *,const char*,int,int,int),
                                      void *cb_data );
 
 void _gcry_set_preferred_rng_type (int type);
 void _gcry_random_initialize (int full);
 void _gcry_random_close_fds (void);
 int  _gcry_get_rng_type (int ignore_fips_mode);
 void _gcry_random_dump_stats(void);
 void _gcry_secure_random_alloc(void);
 void _gcry_enable_quick_random_gen (void);
 int  _gcry_random_is_faked(void);
 void _gcry_set_random_seed_file (const char *name);
 void _gcry_update_random_seed_file (void);
 
 void _gcry_fast_random_poll( void );
 
 gcry_err_code_t _gcry_random_init_external_test (void **r_context,
                                                  unsigned int flags,
                                                  const void *key,
                                                  size_t keylen,
                                                  const void *seed,
                                                  size_t seedlen,
                                                  const void *dt,
                                                  size_t dtlen);
 gcry_err_code_t _gcry_random_run_external_test (void *context,
                                                 char *buffer, size_t buflen);
 void            _gcry_random_deinit_external_test (void *context);
 
 /*-- random-drbg.c --*/
 gpg_err_code_t _gcry_rngdrbg_reinit (const char *flagstr,
                                      gcry_buffer_t *pers, int npers);
 gpg_err_code_t _gcry_rngdrbg_cavs_test (struct gcry_drbg_test_vector *t,
                                         unsigned char *buf);
 gpg_err_code_t _gcry_rngdrbg_healthcheck_one (struct gcry_drbg_test_vector *t);
 
 /*-- rndegd.c --*/
 gpg_error_t _gcry_rndegd_set_socket_name (const char *name);
 
 /*-- rndjent.c --*/
 unsigned int _gcry_rndjent_get_version (int *r_active);
 
 
 #endif /*G10_RANDOM_H*/
diff --git a/random/rndegd.c b/random/rndegd.c
index b87115f2..26456956 100644
--- a/random/rndegd.c
+++ b/random/rndegd.c
@@ -1,290 +1,290 @@
 /* rndegd.c  -	interface to the EGD
  *	Copyright (C) 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <string.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/un.h>
 #include "types.h"
 #include "g10lib.h"
 #include "cipher.h"
 #include "rand-internal.h"
 
 #ifndef offsetof
 #define offsetof(type, member) ((size_t) &((type *)0)->member)
 #endif
 
 static int egd_socket = -1;
 
 /* Allocated name of the socket if supplied at runtime.  */
 static char *user_socket_name;
 
 
 /* Allocate a new filename from FIRST_PART and SECOND_PART and to
    tilde expansion for first_part.  SECOND_PART might be NULL.
  */
 static char *
 my_make_filename (const char *first_part, const char *second_part)
 {
   size_t n;
   char *name, *home, *p;
 
   n = strlen(first_part)+1;
   if (second_part)
     n += strlen (second_part) + 1;
 
   home = NULL;
   if( *first_part == '~' && first_part[1] == '/'
       && (home = getenv("HOME")) && *home )
     n += strlen(home);
 
   name = _gcry_xmalloc(n);
   p = (home
        ? stpcpy (stpcpy (name, home), first_part+1 )
        : stpcpy (name, first_part) );
 
   if (second_part)
     strcpy (stpcpy(p,"/"), second_part);
 
   return name;
 }
 
 
 static int
 do_write( int fd, void *buf, size_t nbytes )
 {
   size_t nleft = nbytes;
   int nwritten;
 
   while( nleft > 0 )
     {
       nwritten = write( fd, buf, nleft);
       if( nwritten < 0 )
         {
           if( errno == EINTR )
             continue;
           return -1;
 	}
       nleft -= nwritten;
       buf = (char*)buf + nwritten;
     }
   return 0;
 }
 
 static int
 do_read( int fd, void *buf, size_t nbytes )
 {
   int n, nread = 0;
 
   do
     {
       do
         {
           n = read(fd, (char*)buf + nread, nbytes );
         }
       while( n == -1 && errno == EINTR );
       if( n == -1)
         return nread? nread:-1;
       if( n == 0)
         return -1;
       nread += n;
       nbytes -= n;
     }
   while( nread < nbytes );
   return nread;
 }
 
 
 /* Note that his function is not thread-safe.  */
 gpg_error_t
 _gcry_rndegd_set_socket_name (const char *name)
 {
   char *newname;
   struct sockaddr_un addr;
 
   newname = my_make_filename (name, NULL);
   if (strlen (newname)+1 >= sizeof addr.sun_path)
     {
       xfree (newname);
       return gpg_error_from_syserror ();
     }
   xfree (user_socket_name);
   user_socket_name = newname;
   return 0;
 }
 
 
 /* Connect to the EGD and return the file descriptor.  Return -1 on
    error.  With NOFAIL set to true, silently fail and return the
    error, otherwise print an error message and die. */
 int
 _gcry_rndegd_connect_socket (int nofail)
 {
   int fd;
   const char *bname = NULL;
   char *name;
   struct sockaddr_un addr;
   int addr_len;
 
   if (egd_socket != -1)
     {
       close (egd_socket);
       egd_socket = -1;
     }
 
 #ifdef EGD_SOCKET_NAME
   bname = EGD_SOCKET_NAME;
 #endif
   if (user_socket_name)
     {
       name = _gcry_strdup (user_socket_name);
       if (!name)
         {
           if (!nofail)
             log_fatal ("error allocating memory in rndegd: %s\n",
                        strerror(errno) );
           return -1;
         }
     }
   else if ( !bname || !*bname )
     name = my_make_filename ("~/.gnupg", "entropy");
   else
     name = my_make_filename (bname, NULL);
 
   if (strlen(name)+1 >= sizeof addr.sun_path)
     log_fatal ("EGD socketname is too long\n");
 
   memset( &addr, 0, sizeof addr );
   addr.sun_family = AF_UNIX;
   strcpy( addr.sun_path, name );
   addr_len = (offsetof( struct sockaddr_un, sun_path )
               + strlen( addr.sun_path ));
 
   fd = socket(AF_UNIX, SOCK_STREAM, 0);
   if (fd == -1 && !nofail)
     log_fatal("can't create unix domain socket: %s\n", strerror(errno) );
   else if (connect (fd, (struct sockaddr*)&addr, addr_len) == -1)
     {
       if (!nofail)
         log_fatal("can't connect to EGD socket `%s': %s\n",
 		  name, strerror(errno) );
       close (fd);
       fd = -1;
     }
   xfree (name);
   if (fd != -1)
     egd_socket = fd;
   return fd;
 }
 
 /****************
  * Note: We always use the highest level.
  * To boost the performance we may want to add some
  * additional code for level 1
  *
  * Using a level of 0 should never block and better add nothing
  * to the pool.  So this is just a dummy for EGD.
  */
 int
 _gcry_rndegd_gather_random (void (*add)(const void*, size_t,
                                         enum random_origins),
                             enum random_origins origin,
                             size_t length, int level )
 {
   int fd = egd_socket;
   int n;
   byte buffer[256+2];
   int nbytes;
   int do_restart = 0;
 
   if( !length )
     return 0;
   if( !level )
     return 0;
 
  restart:
   if (fd == -1 || do_restart)
     fd = _gcry_rndegd_connect_socket (0);
 
   do_restart = 0;
 
   nbytes = length < 255? length : 255;
   /* First time we do it with a non blocking request */
   buffer[0] = 1; /* non blocking */
   buffer[1] = nbytes;
   if( do_write( fd, buffer, 2 ) == -1 )
     log_fatal("can't write to the EGD: %s\n", strerror(errno) );
   n = do_read( fd, buffer, 1 );
   if( n == -1 )
     {
       log_error("read error on EGD: %s\n", strerror(errno));
       do_restart = 1;
       goto restart;
     }
   n = buffer[0];
   if( n )
     {
       n = do_read( fd, buffer, n );
       if( n == -1 )
         {
           log_error("read error on EGD: %s\n", strerror(errno));
           do_restart = 1;
           goto restart;
 	}
       (*add)( buffer, n, origin );
       length -= n;
     }
 
   if( length )
     {
       log_info (
       _("Please wait, entropy is being gathered. Do some work if it would\n"
         "keep you from getting bored, because it will improve the quality\n"
         "of the entropy.\n") );
     }
   while( length )
     {
       nbytes = length < 255? length : 255;
 
       buffer[0] = 2; /* blocking */
       buffer[1] = nbytes;
       if( do_write( fd, buffer, 2 ) == -1 )
         log_fatal("can't write to the EGD: %s\n", strerror(errno) );
       n = do_read( fd, buffer, nbytes );
       if( n == -1 )
         {
           log_error("read error on EGD: %s\n", strerror(errno));
           do_restart = 1;
           goto restart;
 	}
       (*add)( buffer, n, origin );
       length -= n;
     }
   memset(buffer, 0, sizeof(buffer) );
 
   return 0; /* success */
 }
diff --git a/src/cipher.h b/src/cipher.h
index 95ed43d7..3c48c2c5 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -1,225 +1,225 @@
 /* cipher.h
  *	Copyright (C) 1998, 2002, 2003, 2009 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 #ifndef G10_CIPHER_H
 #define G10_CIPHER_H
 
 #include "gcrypt-int.h"
 
 #define DBG_CIPHER _gcry_get_debug_flag( 1 )
 
 #include "../random/random.h"
 
 #define PUBKEY_FLAG_NO_BLINDING    (1 << 0)
 #define PUBKEY_FLAG_RFC6979        (1 << 1)
 #define PUBKEY_FLAG_FIXEDLEN       (1 << 2)
 #define PUBKEY_FLAG_LEGACYRESULT   (1 << 3)
 #define PUBKEY_FLAG_RAW_FLAG       (1 << 4)
 #define PUBKEY_FLAG_TRANSIENT_KEY  (1 << 5)
 #define PUBKEY_FLAG_USE_X931       (1 << 6)
 #define PUBKEY_FLAG_USE_FIPS186    (1 << 7)
 #define PUBKEY_FLAG_USE_FIPS186_2  (1 << 8)
 #define PUBKEY_FLAG_PARAM          (1 << 9)
 #define PUBKEY_FLAG_COMP           (1 << 10)
 #define PUBKEY_FLAG_NOCOMP         (1 << 11)
 #define PUBKEY_FLAG_EDDSA          (1 << 12)
 #define PUBKEY_FLAG_GOST           (1 << 13)
 #define PUBKEY_FLAG_NO_KEYTEST     (1 << 14)
 #define PUBKEY_FLAG_DJB_TWEAK      (1 << 15)
 #define PUBKEY_FLAG_SM2            (1 << 16)
 #define PUBKEY_FLAG_PREHASH        (1 << 17)
 
 
 enum pk_operation
   {
     PUBKEY_OP_ENCRYPT,
     PUBKEY_OP_DECRYPT,
     PUBKEY_OP_SIGN,
     PUBKEY_OP_VERIFY
   };
 
 enum pk_encoding
   {
     PUBKEY_ENC_RAW,
     PUBKEY_ENC_PKCS1,
     PUBKEY_ENC_PKCS1_RAW,
     PUBKEY_ENC_OAEP,
     PUBKEY_ENC_PSS,
     PUBKEY_ENC_UNKNOWN
   };
 
 struct pk_encoding_ctx
 {
   enum pk_operation op;
   unsigned int nbits;
 
   enum pk_encoding encoding;
   int flags;
 
   int hash_algo;
 
   /* for OAEP */
   unsigned char *label;
   size_t labellen;
 
   /* for PSS */
   size_t saltlen;
 
   int (* verify_cmp) (void *opaque, gcry_mpi_t tmp);
   void *verify_arg;
 };
 
 #define CIPHER_INFO_NO_WEAK_KEY    1
 
 #include "cipher-proto.h"
 
 /* The internal encryption modes. */
 enum gcry_cipher_internal_modes
   {
     GCRY_CIPHER_MODE_INTERNAL = 0x10000,
     GCRY_CIPHER_MODE_CMAC     = 0x10000 + 1   /* Cipher-based MAC. */
   };
 
 
 /*-- cipher.c --*/
 gcry_err_code_t _gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
 					    int algo, int mode,
 					    unsigned int flags);
 
 /*-- cipher-cmac.c --*/
 gcry_err_code_t _gcry_cipher_cmac_authenticate
 /*           */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen);
 gcry_err_code_t _gcry_cipher_cmac_get_tag
 /*           */ (gcry_cipher_hd_t c,
                  unsigned char *outtag, size_t taglen);
 gcry_err_code_t _gcry_cipher_cmac_check_tag
 /*           */ (gcry_cipher_hd_t c,
                  const unsigned char *intag, size_t taglen);
 gcry_err_code_t _gcry_cipher_cmac_set_subkeys
 /*           */ (gcry_cipher_hd_t c);
 
 /*-- sha1.c --*/
 void _gcry_sha1_hash_buffer (void *outbuf,
                              const void *buffer, size_t length);
 
 /*-- blake2.c --*/
 gcry_err_code_t blake2b_vl_hash (const void *in, size_t inlen,
                                  size_t outputlen, void *output);
 gcry_err_code_t _gcry_blake2_init_with_key(void *ctx, unsigned int flags,
 					   const unsigned char *key,
 					   size_t keylen, int algo);
 
 /*-- dsa.c --*/
 void _gcry_register_pk_dsa_progress (gcry_handler_progress_t cbc, void *cb_data);
 
 /*-- elgamal.c --*/
 void _gcry_register_pk_elg_progress (gcry_handler_progress_t cb,
                                      void *cb_data);
 
 
 /*-- ecc.c --*/
 void _gcry_register_pk_ecc_progress (gcry_handler_progress_t cbc,
                                      void *cb_data);
 
 
 /*-- primegen.c --*/
 void _gcry_register_primegen_progress (gcry_handler_progress_t cb,
                                        void *cb_data);
 
 /*-- pubkey.c --*/
 
 /* Declarations for the cipher specifications.  */
 extern gcry_cipher_spec_t _gcry_cipher_spec_blowfish;
 extern gcry_cipher_spec_t _gcry_cipher_spec_des;
 extern gcry_cipher_spec_t _gcry_cipher_spec_tripledes;
 extern gcry_cipher_spec_t _gcry_cipher_spec_arcfour;
 extern gcry_cipher_spec_t _gcry_cipher_spec_cast5;
 extern gcry_cipher_spec_t _gcry_cipher_spec_aes;
 extern gcry_cipher_spec_t _gcry_cipher_spec_aes192;
 extern gcry_cipher_spec_t _gcry_cipher_spec_aes256;
 extern gcry_cipher_spec_t _gcry_cipher_spec_twofish;
 extern gcry_cipher_spec_t _gcry_cipher_spec_twofish128;
 extern gcry_cipher_spec_t _gcry_cipher_spec_serpent128;
 extern gcry_cipher_spec_t _gcry_cipher_spec_serpent192;
 extern gcry_cipher_spec_t _gcry_cipher_spec_serpent256;
 extern gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40;
 extern gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_128;
 extern gcry_cipher_spec_t _gcry_cipher_spec_seed;
 extern gcry_cipher_spec_t _gcry_cipher_spec_camellia128;
 extern gcry_cipher_spec_t _gcry_cipher_spec_camellia192;
 extern gcry_cipher_spec_t _gcry_cipher_spec_camellia256;
 extern gcry_cipher_spec_t _gcry_cipher_spec_idea;
 extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20;
 extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12;
 extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147;
 extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh;
 extern gcry_cipher_spec_t _gcry_cipher_spec_chacha20;
 extern gcry_cipher_spec_t _gcry_cipher_spec_sm4;
 extern gcry_cipher_spec_t _gcry_cipher_spec_aria128;
 extern gcry_cipher_spec_t _gcry_cipher_spec_aria192;
 extern gcry_cipher_spec_t _gcry_cipher_spec_aria256;
 
 /* Declarations for the digest specifications.  */
 extern const gcry_md_spec_t _gcry_digest_spec_crc32;
 extern const gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510;
 extern const gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440;
 extern const gcry_md_spec_t _gcry_digest_spec_gost3411_94;
 extern const gcry_md_spec_t _gcry_digest_spec_gost3411_cp;
 extern const gcry_md_spec_t _gcry_digest_spec_stribog_256;
 extern const gcry_md_spec_t _gcry_digest_spec_stribog_512;
 extern const gcry_md_spec_t _gcry_digest_spec_md2;
 extern const gcry_md_spec_t _gcry_digest_spec_md4;
 extern const gcry_md_spec_t _gcry_digest_spec_md5;
 extern const gcry_md_spec_t _gcry_digest_spec_rmd160;
 extern const gcry_md_spec_t _gcry_digest_spec_sha1;
 extern const gcry_md_spec_t _gcry_digest_spec_sha224;
 extern const gcry_md_spec_t _gcry_digest_spec_sha256;
 extern const gcry_md_spec_t _gcry_digest_spec_sha384;
 extern const gcry_md_spec_t _gcry_digest_spec_sha512;
 extern const gcry_md_spec_t _gcry_digest_spec_sha512_224;
 extern const gcry_md_spec_t _gcry_digest_spec_sha512_256;
 extern const gcry_md_spec_t _gcry_digest_spec_sha3_224;
 extern const gcry_md_spec_t _gcry_digest_spec_sha3_256;
 extern const gcry_md_spec_t _gcry_digest_spec_sha3_512;
 extern const gcry_md_spec_t _gcry_digest_spec_sha3_384;
 extern const gcry_md_spec_t _gcry_digest_spec_shake128;
 extern const gcry_md_spec_t _gcry_digest_spec_shake256;
 extern const gcry_md_spec_t _gcry_digest_spec_tiger;
 extern const gcry_md_spec_t _gcry_digest_spec_tiger1;
 extern const gcry_md_spec_t _gcry_digest_spec_tiger2;
 extern const gcry_md_spec_t _gcry_digest_spec_whirlpool;
 extern const gcry_md_spec_t _gcry_digest_spec_blake2b_512;
 extern const gcry_md_spec_t _gcry_digest_spec_blake2b_384;
 extern const gcry_md_spec_t _gcry_digest_spec_blake2b_256;
 extern const gcry_md_spec_t _gcry_digest_spec_blake2b_160;
 extern const gcry_md_spec_t _gcry_digest_spec_blake2s_256;
 extern const gcry_md_spec_t _gcry_digest_spec_blake2s_224;
 extern const gcry_md_spec_t _gcry_digest_spec_blake2s_160;
 extern const gcry_md_spec_t _gcry_digest_spec_blake2s_128;
 extern const gcry_md_spec_t _gcry_digest_spec_sm3;
 
 /* Declarations for the pubkey cipher specifications.  */
 extern gcry_pk_spec_t _gcry_pubkey_spec_rsa;
 extern gcry_pk_spec_t _gcry_pubkey_spec_elg;
 extern gcry_pk_spec_t _gcry_pubkey_spec_elg_e;
 extern gcry_pk_spec_t _gcry_pubkey_spec_dsa;
 extern gcry_pk_spec_t _gcry_pubkey_spec_ecc;
 
 
 #endif /*G10_CIPHER_H*/
diff --git a/src/libgcrypt.def b/src/libgcrypt.def
index a66511c8..8fc5767d 100644
--- a/src/libgcrypt.def
+++ b/src/libgcrypt.def
@@ -1,304 +1,304 @@
 ;; libgcrypt.defs -  Exported symbols for W32
 ;; Copyright (C) 2003, 2007 Free Software Foundation, Inc.
 ;;
 ;; This file is part of Libgcrypt.
 ;;
 ;; Libgcrypt is free software; you can redistribute it and/or modify
 ;; it under the terms of the GNU Lesser General Public License as
 ;; published by the Free Software Foundation; either version 2.1 of
 ;; the License, or (at your option) any later version.
 ;;
 ;; Libgcrypt is distributed in the hope that it will be useful,
 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ;; GNU Lesser General Public License for more details.
 ;;
 ;; You should have received a copy of the GNU Lesser General Public
-;; License along with this program; if not, write to the Free Software
-;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+;; License along with this program; if not, see <https://www.gnu.org/licenses/>.
+;; SPDX-License-Identifier: LGPL-2.1-or-later
 ;;
 
 ;; Note: This file should be updated manually and the ordinals shall
 ;; never be changed.  Also check libgcrypt.vers and visibility.h.
 
 
 EXPORTS
       gcry_check_version  @1
       gcry_control  @2
 
       gcry_malloc  @3
       gcry_calloc  @4
       gcry_malloc_secure  @5
       gcry_calloc_secure  @6
       gcry_realloc  @7
       gcry_strdup  @8
       gcry_xmalloc  @9
       gcry_xcalloc  @10
       gcry_xmalloc_secure  @11
       gcry_xcalloc_secure  @12
       gcry_xrealloc  @13
       gcry_xstrdup  @14
       gcry_is_secure  @15
       gcry_free  @16
 
       gcry_set_progress_handler  @17
       gcry_set_allocation_handler  @18
       gcry_set_outofcore_handler  @19
       gcry_set_fatalerror_handler  @20
       gcry_set_log_handler  @21
       gcry_set_gettext_handler  @22
 
       gcry_strerror  @23
       gcry_strsource  @24
       gcry_err_code_from_errno  @25
       gcry_err_code_to_errno  @26
       gcry_err_make_from_errno  @27
       gcry_error_from_errno  @28
 
       gcry_sexp_new  @29
       gcry_sexp_create  @30
       gcry_sexp_sscan  @31
       gcry_sexp_build  @32
       gcry_sexp_build_array  @33
       gcry_sexp_release  @34
       gcry_sexp_canon_len  @35
       gcry_sexp_sprint  @36
       gcry_sexp_dump  @37
       gcry_sexp_cons  @38
       gcry_sexp_alist  @39
       gcry_sexp_vlist  @40
       gcry_sexp_append  @41
       gcry_sexp_prepend  @42
       gcry_sexp_find_token  @43
       gcry_sexp_length  @44
       gcry_sexp_nth  @45
       gcry_sexp_car  @46
       gcry_sexp_cdr  @47
       gcry_sexp_cadr  @48
       gcry_sexp_nth_data  @49
       gcry_sexp_nth_mpi  @50
 
       gcry_mpi_new  @51
       gcry_mpi_snew  @52
       gcry_mpi_release  @53
       gcry_mpi_copy  @54
       gcry_mpi_set  @55
       gcry_mpi_set_ui  @56
       gcry_mpi_swap  @57
       gcry_mpi_cmp  @58
       gcry_mpi_cmp_ui  @59
       gcry_mpi_scan  @60
       gcry_mpi_print  @61
       gcry_mpi_aprint  @62
       gcry_mpi_dump  @63
       gcry_mpi_add  @64
       gcry_mpi_add_ui  @65
       gcry_mpi_addm  @66
       gcry_mpi_sub  @67
       gcry_mpi_sub_ui  @68
       gcry_mpi_subm  @69
       gcry_mpi_mul  @70
       gcry_mpi_mul_ui  @71
       gcry_mpi_mulm  @72
       gcry_mpi_mul_2exp  @73
       gcry_mpi_div  @74
       gcry_mpi_mod  @75
       gcry_mpi_powm  @76
       gcry_mpi_gcd  @77
       gcry_mpi_invm  @78
       gcry_mpi_get_nbits  @79
       gcry_mpi_test_bit  @80
       gcry_mpi_set_bit  @81
       gcry_mpi_clear_bit  @82
       gcry_mpi_set_highbit  @83
       gcry_mpi_clear_highbit  @84
       gcry_mpi_rshift  @85
       gcry_mpi_set_opaque  @86
       gcry_mpi_get_opaque  @87
       gcry_mpi_set_flag  @88
       gcry_mpi_clear_flag  @89
       gcry_mpi_get_flag  @90
       gcry_mpi_get_ui    @91
 
       gcry_cipher_open  @92
       gcry_cipher_close  @93
       gcry_cipher_ctl  @94
       gcry_cipher_info  @95
       gcry_cipher_algo_info  @96
       gcry_cipher_algo_name  @97
       gcry_cipher_map_name  @98
       gcry_cipher_mode_from_oid  @99
       gcry_cipher_encrypt  @100
       gcry_cipher_decrypt  @101
       gcry_cipher_get_algo_keylen  @102
       gcry_cipher_get_algo_blklen  @103
 
 ;; @104 used to be part of the module register interface
 
       gcry_pk_encrypt  @105
       gcry_pk_decrypt  @106
       gcry_pk_sign  @107
       gcry_pk_verify  @108
       gcry_pk_testkey  @109
       gcry_pk_genkey  @110
       gcry_pk_ctl  @111
       gcry_pk_algo_info  @112
       gcry_pk_algo_name  @113
       gcry_pk_map_name  @114
       gcry_pk_get_nbits  @115
       gcry_pk_get_keygrip  @116
 
 ;; @117 used to be part of the module register interface
 
 ;;
 ;; 118 to 142 were used in previous Libgcrypt versions for the gcry_ac
 ;; interface
 ;;
 
       gcry_md_open  @143
       gcry_md_close  @144
       gcry_md_enable  @145
       gcry_md_copy  @146
       gcry_md_reset  @147
       gcry_md_ctl  @148
       gcry_md_write  @149
       gcry_md_read  @150
       gcry_md_hash_buffer  @151
       gcry_md_get_algo  @152
       gcry_md_get_algo_dlen  @153
       gcry_md_is_enabled  @154
       gcry_md_is_secure  @155
       gcry_md_info  @156
       gcry_md_algo_info  @157
       gcry_md_algo_name  @158
       gcry_md_map_name  @159
       gcry_md_setkey  @160
 ;; @161 used to be part of the module register interface
       gcry_randomize  @162
       gcry_random_add_bytes  @163
       gcry_random_bytes  @164
       gcry_random_bytes_secure  @165
       gcry_mpi_randomize  @166
 
       gcry_prime_generate  @167
       gcry_prime_group_generator  @168
       gcry_prime_release_factors  @169
       gcry_prime_check  @170
 
       gcry_create_nonce  @171
 
       gcry_md_debug  @172
 
 ;; @173 used to be part of the module register interface
 ;; @174 used to be part of the module register interface
 ;; @175 used to be part of the module register interface
 ;; @176 used to be part of the module register interface
 ;; @177 used to be part of the module register interface
 ;; @178 used to be part of the module register interface
 ;;
 ;; @179 to @186 used to be part of the removed gcry_ac interface
 ;;
 
       gcry_sexp_nth_string  @187
 
       gcry_cipher_setkey    @188
       gcry_cipher_setiv     @189
       gcry_cipher_setctr    @190
 
       gcry_mpi_lshift       @191
 
       gcry_pk_get_curve     @192
       gcry_pk_get_param     @193
 
       gcry_kdf_derive       @194
 
       gcry_mpi_snatch       @195
 
       gcry_mpi_point_new        @196
       gcry_mpi_point_release    @197
       gcry_mpi_point_get        @198
       gcry_mpi_point_snatch_get @199
       gcry_mpi_point_set        @200
       gcry_mpi_point_snatch_set @201
 
       gcry_ctx_release          @202
 
       gcry_mpi_ec_new           @203
       gcry_mpi_ec_get_mpi       @204
       gcry_mpi_ec_get_point     @205
       gcry_mpi_ec_set_mpi       @206
       gcry_mpi_ec_set_point     @207
       gcry_mpi_ec_get_affine    @208
       gcry_mpi_ec_dup           @209
       gcry_mpi_ec_add           @210
       gcry_mpi_ec_mul           @211
 
       gcry_pubkey_get_sexp      @212
 
       _gcry_mpi_get_const       @213
 
       gcry_sexp_nth_buffer      @214
 
       gcry_mpi_is_neg           @215
       gcry_mpi_neg              @216
       gcry_mpi_abs              @217
 
       gcry_mpi_ec_curve_point   @218
 
       gcry_md_hash_buffers      @219
 
       gcry_log_debug            @220
       gcry_log_debughex         @221
       gcry_log_debugmpi         @222
       gcry_log_debugpnt         @223
       gcry_log_debugsxp         @224
 
       gcry_sexp_extract_param   @225
 
       gcry_cipher_authenticate  @226
       gcry_cipher_gettag        @227
       gcry_cipher_checktag      @228
 
       gcry_mpi_set_opaque_copy  @229
 
       gcry_mac_algo_info        @230
       gcry_mac_algo_name        @231
       gcry_mac_map_name         @232
       gcry_mac_get_algo_maclen  @233
       gcry_mac_get_algo_keylen  @234
       gcry_mac_open             @235
       gcry_mac_close            @236
       gcry_mac_setkey           @237
       gcry_mac_setiv            @238
       gcry_mac_write            @239
       gcry_mac_read             @240
       gcry_mac_verify           @241
       gcry_mac_ctl              @242
       gcry_mac_get_algo         @243
 
       gcry_mpi_ec_sub           @244
 
       gcry_md_extract           @245
 
       gcry_mpi_ec_decode_point  @246
 
       gcry_get_config           @247
 
       gcry_mpi_point_copy       @248
 
       gcry_ecc_get_algo_keylen  @249
       gcry_ecc_mul_point        @250
 
       gcry_pk_hash_sign         @255
       gcry_pk_hash_verify       @256
       gcry_pk_random_override_new @257
 
       gcry_kdf_open             @258
       gcry_kdf_compute          @259
       gcry_kdf_final            @260
       gcry_kdf_close            @261
 
       gcry_cipher_setup_geniv   @262
       gcry_cipher_geniv         @263
 
 ;; end of file with public symbols for Windows.
diff --git a/src/libgcrypt.vers b/src/libgcrypt.vers
index 62b0e4ee..8927d48d 100644
--- a/src/libgcrypt.vers
+++ b/src/libgcrypt.vers
@@ -1,134 +1,134 @@
 # libgcrypt.vers  - What symbols to export                  -*- std -*-
 # Copyright (C) 2002, 2004, 2008, 2011 Free Software Foundation, Inc.
 #
 # This file is part of Libgcrypt.
 #
 # Libgcrypt is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser general Public License as
 # published by the Free Software Foundation; either version 2.1 of
 # the License, or (at your option) any later version.
 #
 # Libgcrypt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Lesser General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+# License along with this program; if not, see <https://www.gnu.org/licenses/>.
+# SPDX-License-Identifier: LGPL-2.1-or-later
 
 # NOTE: When adding new functions, please make sure to add them to
 # visibility.h and libgcrypt.def as well.
 
 GCRYPT_1.6 {
   global:
     gcry_check_version; gcry_control;
     gcry_set_allocation_handler; gcry_set_fatalerror_handler;
     gcry_set_gettext_handler; gcry_set_log_handler;
     gcry_set_outofcore_handler; gcry_set_progress_handler;
 
     gcry_err_code_from_errno; gcry_err_code_to_errno;
     gcry_err_make_from_errno; gcry_error_from_errno;
     gcry_strerror; gcry_strsource;
 
     gcry_free; gcry_malloc; gcry_malloc_secure; gcry_calloc;
     gcry_calloc_secure; gcry_realloc; gcry_strdup; gcry_is_secure;
     gcry_xcalloc; gcry_xcalloc_secure; gcry_xmalloc;
     gcry_xmalloc_secure; gcry_xrealloc; gcry_xstrdup;
 
     gcry_md_algo_info; gcry_md_algo_name; gcry_md_close;
     gcry_md_copy; gcry_md_ctl; gcry_md_enable; gcry_md_get;
     gcry_md_get_algo; gcry_md_get_algo_dlen; gcry_md_hash_buffer;
     gcry_md_hash_buffers;
     gcry_md_info; gcry_md_is_enabled; gcry_md_is_secure;
     gcry_md_map_name; gcry_md_open; gcry_md_read; gcry_md_extract;
     gcry_md_reset; gcry_md_setkey;
     gcry_md_write; gcry_md_debug;
 
     gcry_cipher_algo_info; gcry_cipher_algo_name; gcry_cipher_close;
     gcry_cipher_ctl; gcry_cipher_decrypt; gcry_cipher_encrypt;
     gcry_cipher_get_algo_blklen; gcry_cipher_get_algo_keylen;
     gcry_cipher_info; gcry_cipher_map_name;
     gcry_cipher_mode_from_oid; gcry_cipher_open;
     gcry_cipher_setkey; gcry_cipher_setiv; gcry_cipher_setctr;
     gcry_cipher_authenticate; gcry_cipher_gettag; gcry_cipher_checktag;
 
     gcry_mac_algo_info; gcry_mac_algo_name; gcry_mac_map_name;
     gcry_mac_get_algo_maclen; gcry_mac_get_algo_keylen; gcry_mac_get_algo;
     gcry_mac_open; gcry_mac_close; gcry_mac_setkey; gcry_mac_setiv;
     gcry_mac_write; gcry_mac_read; gcry_mac_verify; gcry_mac_ctl;
 
     gcry_pk_algo_info; gcry_pk_algo_name; gcry_pk_ctl;
     gcry_pk_decrypt; gcry_pk_encrypt; gcry_pk_genkey;
     gcry_pk_get_keygrip; gcry_pk_get_nbits;
     gcry_pk_map_name; gcry_pk_register; gcry_pk_sign;
     gcry_pk_testkey; gcry_pk_verify;
     gcry_pk_get_curve; gcry_pk_get_param;
 
     gcry_pubkey_get_sexp;
 
     gcry_ecc_get_algo_keylen;
     gcry_ecc_mul_point;
 
     gcry_kdf_derive;
 
     gcry_prime_check; gcry_prime_generate;
     gcry_prime_group_generator; gcry_prime_release_factors;
 
     gcry_random_add_bytes; gcry_random_bytes; gcry_random_bytes_secure;
     gcry_randomize; gcry_create_nonce;
 
     gcry_sexp_alist; gcry_sexp_append; gcry_sexp_build;
     gcry_sexp_build_array; gcry_sexp_cadr; gcry_sexp_canon_len;
     gcry_sexp_car; gcry_sexp_cdr; gcry_sexp_cons; gcry_sexp_create;
     gcry_sexp_dump; gcry_sexp_find_token; gcry_sexp_length;
     gcry_sexp_new; gcry_sexp_nth; gcry_sexp_nth_buffer; gcry_sexp_nth_data;
     gcry_sexp_nth_mpi; gcry_sexp_prepend; gcry_sexp_release;
     gcry_sexp_sprint; gcry_sexp_sscan; gcry_sexp_vlist;
     gcry_sexp_nth_string; gcry_sexp_extract_param;
 
     gcry_mpi_is_neg; gcry_mpi_neg; gcry_mpi_abs;
     gcry_mpi_add; gcry_mpi_add_ui; gcry_mpi_addm; gcry_mpi_aprint;
     gcry_mpi_clear_bit; gcry_mpi_clear_flag; gcry_mpi_clear_highbit;
     gcry_mpi_cmp; gcry_mpi_cmp_ui; gcry_mpi_copy; gcry_mpi_div;
     gcry_mpi_dump; gcry_mpi_gcd; gcry_mpi_get_flag; gcry_mpi_get_nbits;
     gcry_mpi_get_opaque; gcry_mpi_invm; gcry_mpi_mod; gcry_mpi_mul;
     gcry_mpi_mul_2exp; gcry_mpi_mul_ui; gcry_mpi_mulm; gcry_mpi_new;
     gcry_mpi_powm; gcry_mpi_print; gcry_mpi_randomize; gcry_mpi_release;
     gcry_mpi_rshift; gcry_mpi_scan; gcry_mpi_set; gcry_mpi_set_bit;
     gcry_mpi_set_flag; gcry_mpi_set_highbit;
     gcry_mpi_set_opaque; gcry_mpi_set_opaque_copy;
     gcry_mpi_set_ui; gcry_mpi_snew; gcry_mpi_sub; gcry_mpi_sub_ui;
     gcry_mpi_subm; gcry_mpi_swap; gcry_mpi_test_bit;
     gcry_mpi_lshift; gcry_mpi_snatch;
     gcry_mpi_point_new; gcry_mpi_point_release;
     gcry_mpi_point_get; gcry_mpi_point_snatch_get;
     gcry_mpi_point_set; gcry_mpi_point_snatch_set;
     gcry_mpi_ec_new;
     gcry_mpi_ec_get_mpi; gcry_mpi_ec_get_point;
     gcry_mpi_ec_set_mpi; gcry_mpi_ec_set_point;
     gcry_mpi_ec_get_affine;
     gcry_mpi_ec_dup; gcry_mpi_ec_add; gcry_mpi_ec_sub; gcry_mpi_ec_mul;
     gcry_mpi_ec_curve_point; gcry_mpi_ec_decode_point;
     gcry_mpi_point_copy;
     gcry_mpi_get_ui;
 
     gcry_log_debug;
     gcry_log_debughex; gcry_log_debugmpi; gcry_log_debugpnt; gcry_log_debugsxp;
 
     gcry_get_config;
 
     _gcry_mpi_get_const;
 
     gcry_ctx_release;
 
     gcry_pk_hash_sign; gcry_pk_hash_verify; gcry_pk_random_override_new;
 
     gcry_kdf_open; gcry_kdf_compute; gcry_kdf_final; gcry_kdf_close;
 
     gcry_cipher_setup_geniv; gcry_cipher_geniv;
 
   local:
     *;
 
 };
diff --git a/src/missing-string.c b/src/missing-string.c
index 4756c00e..43407616 100644
--- a/src/missing-string.c
+++ b/src/missing-string.c
@@ -1,54 +1,54 @@
 /* missing-string.c - missing string utilities
  * Copyright (C) 1994, 1998, 1999, 2000, 2001,
  *               2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 
 #include "g10lib.h"
 
 
 #ifndef HAVE_STPCPY
 char *
 stpcpy(char *a,const char *b)
 {
     while( *b )
 	*a++ = *b++;
     *a = 0;
 
     return (char*)a;
 }
 #endif
 
 
 #ifndef HAVE_STRCASECMP
 int
 strcasecmp( const char *a, const char *b )
 {
     for( ; *a && *b; a++, b++ ) {
 	if( *a != *b && toupper(*a) != toupper(*b) )
 	    break;
     }
     return *(const byte*)a - *(const byte*)b;
 }
 #endif
diff --git a/src/mpi.h b/src/mpi.h
index c2ebd0da..3b5a0b2e 100644
--- a/src/mpi.h
+++ b/src/mpi.h
@@ -1,325 +1,325 @@
 /* mpi.h  -  Multi Precision Integers
  * Copyright (C) 1994, 1996, 1998,
  *               2001, 2002, 2003, 2005 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #ifndef G10_MPI_H
 #define G10_MPI_H
 
 #include <config.h>
 #include <stdio.h>
 #include <string.h>
 
 #include "types.h"
 #include "../mpi/mpi-asm-defs.h"
 
 #include "g10lib.h"
 
 #ifndef _GCRYPT_IN_LIBGCRYPT
 #error this file should only be used inside libgcrypt
 #endif
 
 #ifndef BITS_PER_MPI_LIMB
 #if BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_INT
   typedef unsigned int mpi_limb_t;
   typedef   signed int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG
   typedef unsigned long int mpi_limb_t;
   typedef   signed long int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG_LONG
   typedef unsigned long long int mpi_limb_t;
   typedef   signed long long int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_SHORT
   typedef unsigned short int mpi_limb_t;
   typedef   signed short int mpi_limb_signed_t;
 #else
 #error BYTES_PER_MPI_LIMB does not match any C type
 #endif
 #define BITS_PER_MPI_LIMB    (8*BYTES_PER_MPI_LIMB)
 #endif /*BITS_PER_MPI_LIMB*/
 
 #define DBG_MPI     _gcry_get_debug_flag( 2 );
 
 struct gcry_mpi
 {
   int alloced;         /* Array size (# of allocated limbs). */
   int nlimbs;          /* Number of valid limbs. */
   int sign;	       /* Indicates a negative number and is also used
 		          for opaque MPIs to store the length.  */
   unsigned int flags; /* Bit 0: Array to be allocated in secure memory space.*/
                       /* Bit 2: The limb is a pointer to some m_alloced data.*/
                       /* Bit 4: Immutable MPI - the MPI may not be modified.  */
                       /* Bit 5: Constant MPI - the MPI will not be freed.  */
   mpi_limb_t *d;      /* Array with the limbs */
 };
 
 #define MPI_NULL NULL
 
 #define mpi_get_nlimbs(a)     ((a)->nlimbs)
 #define mpi_has_sign(a)	      ((a)->sign)
 
 /*-- mpiutil.c --*/
 
 #ifdef M_DEBUG
 # define mpi_alloc(n)	_gcry_mpi_debug_alloc((n), M_DBGINFO( __LINE__ ) )
 # define mpi_alloc_secure(n)  _gcry_mpi_debug_alloc_secure((n), M_DBGINFO( __LINE__ ) )
 # define mpi_free(a)	_gcry_mpi_debug_free((a), M_DBGINFO(__LINE__) )
 # define mpi_resize(a,b) _gcry_mpi_debug_resize((a),(b), M_DBGINFO(__LINE__) )
 # define mpi_copy(a)	  _gcry_mpi_debug_copy((a), M_DBGINFO(__LINE__) )
   gcry_mpi_t _gcry_mpi_debug_alloc( unsigned nlimbs, const char *info );
   gcry_mpi_t _gcry_mpi_debug_alloc_secure( unsigned nlimbs, const char *info );
   void _gcry_mpi_debug_free( gcry_mpi_t a, const char *info );
   void _gcry_mpi_debug_resize( gcry_mpi_t a, unsigned nlimbs, const char *info );
   gcry_mpi_t  _gcry_mpi_debug_copy( gcry_mpi_t a, const char *info	);
 #else
 # define mpi_alloc(n)	       _gcry_mpi_alloc((n) )
 # define mpi_alloc_secure(n)  _gcry_mpi_alloc_secure((n) )
 # define mpi_free(a)	       _gcry_mpi_free((a) )
 # define mpi_resize(a,b)      _gcry_mpi_resize((a),(b))
 # define mpi_copy(a)	       _gcry_mpi_copy((a))
   gcry_mpi_t  _gcry_mpi_alloc( unsigned nlimbs );
   gcry_mpi_t  _gcry_mpi_alloc_secure( unsigned nlimbs );
   void _gcry_mpi_free( gcry_mpi_t a );
   void _gcry_mpi_resize( gcry_mpi_t a, unsigned nlimbs );
   gcry_mpi_t  _gcry_mpi_copy( gcry_mpi_t a );
 #endif
 
 void _gcry_mpi_immutable_failed (void);
 #define mpi_immutable_failed() _gcry_mpi_immutable_failed ()
 
 #define mpi_is_const(a)       ((a)->flags&32)
 #define mpi_is_immutable(a)   ((a)->flags&16)
 #define mpi_is_opaque(a)      ((a) && ((a)->flags&4))
 #define mpi_is_secure(a)      ((a) && ((a)->flags&1))
 #define mpi_clear(a)          _gcry_mpi_clear ((a))
 #define mpi_alloc_like(a)     _gcry_mpi_alloc_like((a))
 
 #define mpi_alloc_set_ui(a)   _gcry_mpi_alloc_set_ui ((a))
 #define mpi_const(n)          _gcry_mpi_const ((n))
 #define mpi_swap_cond(a,b,sw)  _gcry_mpi_swap_cond ((a),(b),(sw))
 #define mpi_set_cond(w,u,set)  _gcry_mpi_set_cond ((w),(u),(set))
 #define mpi_set_bit_cond(a,n,set) _gcry_mpi_set_bit_cond ((a),(n),(set))
 
 void _gcry_mpi_clear( gcry_mpi_t a );
 gcry_mpi_t _gcry_mpi_set_cond (gcry_mpi_t w, const gcry_mpi_t u,
                                unsigned long swap);
 gcry_mpi_t  _gcry_mpi_alloc_like( gcry_mpi_t a );
 gcry_mpi_t  _gcry_mpi_alloc_set_ui( unsigned long u);
 void _gcry_mpi_swap( gcry_mpi_t a, gcry_mpi_t b);
 void _gcry_mpi_swap_cond (gcry_mpi_t a, gcry_mpi_t b, unsigned long swap);
 void _gcry_mpi_set_bit_cond (gcry_mpi_t a, unsigned int n, unsigned long set);
 gcry_mpi_t _gcry_mpi_new (unsigned int nbits);
 gcry_mpi_t _gcry_mpi_snew (unsigned int nbits);
 gcry_mpi_t _gcry_mpi_set_opaque_copy (gcry_mpi_t a,
                                       const void *p, unsigned int nbits);
 void *_gcry_mpi_get_opaque_copy (gcry_mpi_t a, unsigned int *nbits);
 int _gcry_mpi_is_neg (gcry_mpi_t a);
 void _gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u);
 void _gcry_mpi_abs (gcry_mpi_t w);
 
 /* Constants used to return constant MPIs.  See _gcry_mpi_init if you
    want to add more constants. */
 #define MPI_NUMBER_OF_CONSTANTS 6
 enum gcry_mpi_constants
   {
     MPI_C_ZERO,
     MPI_C_ONE,
     MPI_C_TWO,
     MPI_C_THREE,
     MPI_C_FOUR,
     MPI_C_EIGHT
   };
 
 
 gcry_mpi_t _gcry_mpi_const (enum gcry_mpi_constants no);
 
 
 /*-- mpicoder.c --*/
 void  _gcry_log_mpidump( const char *text, gcry_mpi_t a );
 u32   _gcry_mpi_get_keyid( gcry_mpi_t a, u32 *keyid );
 byte *_gcry_mpi_get_buffer (gcry_mpi_t a, unsigned int fill_le,
                             unsigned int *r_nbytes, int *sign);
 byte *_gcry_mpi_get_buffer_extra (gcry_mpi_t a, unsigned int fill_le,
                                   int extraalloc,
                                   unsigned int *r_nbytes, int *sign);
 byte *_gcry_mpi_get_secure_buffer (gcry_mpi_t a, unsigned int fill_le,
                                    unsigned *r_nbytes, int *sign);
 void  _gcry_mpi_set_buffer ( gcry_mpi_t a, const void *buffer,
                              unsigned int nbytes, int sign );
 gpg_err_code_t _gcry_mpi_to_octet_string (unsigned char **r_frame,
                                           void *space,
                                           gcry_mpi_t value, size_t nbytes);
 
 /*-- mpi-div.c --*/
 #define mpi_fdiv_r_ui(a,b,c)   _gcry_mpi_fdiv_r_ui((a),(b),(c))
 #define mpi_fdiv_r(a,b,c)      _gcry_mpi_fdiv_r((a),(b),(c))
 #define mpi_fdiv_q(a,b,c)      _gcry_mpi_fdiv_q((a),(b),(c))
 #define mpi_fdiv_qr(a,b,c,d)   _gcry_mpi_fdiv_qr((a),(b),(c),(d))
 #define mpi_tdiv_r(a,b,c)      _gcry_mpi_tdiv_r((a),(b),(c))
 #define mpi_tdiv_qr(a,b,c,d)   _gcry_mpi_tdiv_qr((a),(b),(c),(d))
 #define mpi_tdiv_q_2exp(a,b,c) _gcry_mpi_tdiv_q_2exp((a),(b),(c))
 #define mpi_divisible_ui(a,b)  _gcry_mpi_divisible_ui((a),(b))
 
 unsigned long _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, unsigned long divisor );
 void  _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor );
 void  _gcry_mpi_fdiv_q( gcry_mpi_t quot, gcry_mpi_t dividend, gcry_mpi_t divisor );
 void  _gcry_mpi_fdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor );
 void  _gcry_mpi_tdiv_r( gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den);
 void  _gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den);
 void  _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned count );
 int   _gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor );
 
 
 /*-- mpi-mod.c --*/
 #define mpi_barrett_init(m,f)     _gcry_mpi_barrett_init ((m),(f))
 #define mpi_barrett_free(c)       _gcry_mpi_barrett_free ((c))
 #define mpi_mod_barrett(r,a,c)    _gcry_mpi_mod_barrett ((r), (a), (c))
 #define mpi_mul_barrett(r,u,v,c)  _gcry_mpi_mul_barrett ((r), (u), (v), (c))
 
 /* Context used with Barrett reduction.  */
 struct barrett_ctx_s;
 typedef struct barrett_ctx_s *mpi_barrett_t;
 
 mpi_barrett_t _gcry_mpi_barrett_init (gcry_mpi_t m, int copy);
 void _gcry_mpi_barrett_free (mpi_barrett_t ctx);
 void _gcry_mpi_mod_barrett (gcry_mpi_t r, gcry_mpi_t x, mpi_barrett_t ctx);
 void _gcry_mpi_mul_barrett (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v,
                             mpi_barrett_t ctx);
 
 
 /*-- mpi-mpow.c --*/
 #define mpi_mulpowm(a,b,c,d) _gcry_mpi_mulpowm ((a),(b),(c),(d))
 void _gcry_mpi_mulpowm( gcry_mpi_t res, gcry_mpi_t *basearray, gcry_mpi_t *exparray, gcry_mpi_t mod);
 
 /*-- mpi-scan.c --*/
 #define mpi_trailing_zeros(a) _gcry_mpi_trailing_zeros ((a))
 int      _gcry_mpi_getbyte( gcry_mpi_t a, unsigned idx );
 void     _gcry_mpi_putbyte( gcry_mpi_t a, unsigned idx, int value );
 unsigned _gcry_mpi_trailing_zeros( gcry_mpi_t a );
 
 /*-- mpi-bit.c --*/
 #define mpi_normalize(a)       _gcry_mpi_normalize ((a))
 
 void _gcry_mpi_normalize( gcry_mpi_t a );
 
 /*-- ec.c --*/
 
 /* Object to represent a point in projective coordinates. */
 struct gcry_mpi_point
 {
   gcry_mpi_t x;
   gcry_mpi_t y;
   gcry_mpi_t z;
 };
 typedef struct gcry_mpi_point mpi_point_struct;
 typedef struct gcry_mpi_point *mpi_point_t;
 
 void _gcry_mpi_point_init (mpi_point_t p);
 void _gcry_mpi_point_free_parts (mpi_point_t p);
 void _gcry_mpi_get_point (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
                           mpi_point_t point);
 void _gcry_mpi_snatch_point (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
                              mpi_point_t point);
 
 
 /* Models describing an elliptic curve.  */
 enum gcry_mpi_ec_models
   {
     /* The Short Weierstrass equation is
           y^2 = x^3 + ax + b
      */
     MPI_EC_WEIERSTRASS = 0,
     /* The Montgomery equation is
           by^2 = x^3 + ax^2 + x
      */
     MPI_EC_MONTGOMERY,
     /* The Twisted Edwards equation is
           ax^2 + y^2 = 1 + bx^2y^2
        Note that we use 'b' instead of the commonly used 'd'.  */
     MPI_EC_EDWARDS
   };
 
 /* Dialects used with elliptic curves.  It is easier to keep the
    definition here than in ecc-common.h. */
 enum ecc_dialects
   {
     ECC_DIALECT_STANDARD = 0,
     ECC_DIALECT_ED25519,
     ECC_DIALECT_SAFECURVE
   };
 
 
 void _gcry_mpi_point_log (const char *name, mpi_point_t point, mpi_ec_t ctx);
 #define log_printpnt(a,p,c) _gcry_mpi_point_log ((a), (p), (c))
 
 mpi_ec_t _gcry_mpi_ec_p_internal_new (enum gcry_mpi_ec_models model,
                                       enum ecc_dialects dialect,
                                       int flags,
                                       gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b);
 gpg_err_code_t _gcry_mpi_ec_p_new (gcry_ctx_t *r_ctx,
                                    enum gcry_mpi_ec_models model,
                                    enum ecc_dialects dialect,
                                    int flags,
                                    gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b);
 void _gcry_mpi_ec_free (mpi_ec_t ctx);
 
 void _gcry_mpi_ec_dup_point (mpi_point_t result,
                              mpi_point_t point, mpi_ec_t ctx);
 void _gcry_mpi_ec_add_points (mpi_point_t result,
                               mpi_point_t p1, mpi_point_t p2,
                               mpi_ec_t ctx);
 void _gcry_mpi_ec_sub_points (mpi_point_t result,
                               mpi_point_t p1, mpi_point_t p2,
                               mpi_ec_t ctx);
 void _gcry_mpi_ec_mul_point (mpi_point_t result,
                              gcry_mpi_t scalar, mpi_point_t point,
                              mpi_ec_t ctx);
 int  _gcry_mpi_ec_curve_point (gcry_mpi_point_t point, mpi_ec_t ctx);
 int _gcry_mpi_ec_bad_point (gcry_mpi_point_t point, mpi_ec_t ctx);
 
 gcry_mpi_t _gcry_mpi_ec_ec2os (gcry_mpi_point_t point, mpi_ec_t ectx);
 
 gcry_mpi_t _gcry_mpi_ec_get_mpi (const char *name, gcry_ctx_t ctx, int copy);
 gcry_mpi_point_t _gcry_mpi_ec_get_point (const char *name,
                                          gcry_ctx_t ctx, int copy);
 gpg_err_code_t _gcry_mpi_ec_set_mpi (const char *name, gcry_mpi_t newvalue,
                                      gcry_ctx_t ctx);
 gpg_err_code_t _gcry_mpi_ec_set_point (const char *name,
                                        gcry_mpi_point_t newvalue,
                                        gcry_ctx_t ctx);
 gpg_err_code_t _gcry_mpi_ec_decode_point (mpi_point_t result,
                                           gcry_mpi_t value, mpi_ec_t ec);
 
 /*-- ecc-curves.c --*/
 gpg_err_code_t _gcry_mpi_ec_new (gcry_ctx_t *r_ctx,
                                  gcry_sexp_t keyparam, const char *curvename);
 gpg_err_code_t _gcry_mpi_ec_internal_new (mpi_ec_t *r_ec, int *r_flags,
                                           const char *name_op,
                                           gcry_sexp_t keyparam,
                                           const char *curvename);
 
 
 
 #endif /*G10_MPI_H*/
diff --git a/src/secmem.h b/src/secmem.h
index 8ad6ef1a..4dd5c318 100644
--- a/src/secmem.h
+++ b/src/secmem.h
@@ -1,42 +1,42 @@
 /* secmem.h -  internal definitions for secmem
  *	Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifndef G10_SECMEM_H
 #define G10_SECMEM_H 1
 
 void _gcry_secmem_init (size_t npool);
 void _gcry_secmem_term (void);
 void *_gcry_secmem_malloc (size_t size, int xhint) _GCRY_GCC_ATTR_MALLOC;
 void *_gcry_secmem_realloc (void *a, size_t newsize, int xhint);
 int  _gcry_secmem_free (void *a);
 void _gcry_secmem_dump_stats (int extended);
 void _gcry_secmem_set_auto_expand (unsigned int chunksize);
 void _gcry_secmem_set_flags (unsigned flags);
 unsigned _gcry_secmem_get_flags(void);
 int _gcry_private_is_secure (const void *p);
 
 /* Flags for _gcry_secmem_{set,get}_flags.  */
 #define GCRY_SECMEM_FLAG_NO_WARNING      (1 << 0)
 #define GCRY_SECMEM_FLAG_SUSPEND_WARNING (1 << 1)
 #define GCRY_SECMEM_FLAG_NOT_LOCKED      (1 << 2)
 #define GCRY_SECMEM_FLAG_NO_MLOCK        (1 << 3)
 #define GCRY_SECMEM_FLAG_NO_PRIV_DROP    (1 << 4)
 
 #endif /* G10_SECMEM_H */
diff --git a/src/stdmem.h b/src/stdmem.h
index ba885005..30ad1fcd 100644
--- a/src/stdmem.h
+++ b/src/stdmem.h
@@ -1,29 +1,29 @@
 /* stdmem.h -  internal definitions for stdmem
  *	Copyright (C) 2000, 2002, 2005 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifndef G10_STDMEM_H
 #define G10_STDMEM_H 1
 
 void *_gcry_private_malloc (size_t n) _GCRY_GCC_ATTR_MALLOC;
 void *_gcry_private_malloc_secure (size_t n, int xhint) _GCRY_GCC_ATTR_MALLOC;
 void *_gcry_private_realloc (void *a, size_t n, int xhint);
 void _gcry_private_free (void *a);
 
 #endif /* G10_STDMEM_H */
diff --git a/src/types.h b/src/types.h
index 8b69dce9..deee831c 100644
--- a/src/types.h
+++ b/src/types.h
@@ -1,136 +1,136 @@
 /* types.h - some common typedefs
  *	Copyright (C) 1998, 2000, 2002, 2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifndef GCRYPT_TYPES_H
 #define GCRYPT_TYPES_H
 
 #ifndef _GCRYPT_CONFIG_H_INCLUDED
 # error config.h must be included before types.h
 #endif
 
 /* The AC_CHECK_SIZEOF() in configure fails for some machines.
  * we provide some fallback values here */
 #if !SIZEOF_UNSIGNED_SHORT
 # undef SIZEOF_UNSIGNED_SHORT
 # define SIZEOF_UNSIGNED_SHORT 2
 #endif
 #if !SIZEOF_UNSIGNED_INT
 # undef SIZEOF_UNSIGNED_INT
 # define SIZEOF_UNSIGNED_INT 4
 #endif
 #if !SIZEOF_UNSIGNED_LONG
 # undef SIZEOF_UNSIGNED_LONG
 # define SIZEOF_UNSIGNED_LONG 4
 #endif
 
 
 #include <sys/types.h>
 
 /* Provide uintptr_t */
 #ifdef HAVE_STDINT_H
 # include <stdint.h> /* uintptr_t */
 #elif defined(HAVE_INTTYPES_H)
 # include <inttypes.h>
 #else
 /* In this case, uintptr_t is provided by config.h. */
 #endif
 
 
 
 #ifndef HAVE_BYTE
 # undef byte	/* In case there is a macro with that name.  */
 # if !(defined(_WIN32) && defined(cbNDRContext))
    /* Windows typedefs byte in the rpc headers.  Avoid warning about
       double definition.  */
    typedef unsigned char byte;
 # endif
 # define HAVE_BYTE
 #endif
 
 #ifndef HAVE_USHORT
 # undef ushort  /* In case there is a macro with that name.  */
   typedef unsigned short ushort;
 # define HAVE_USHORT
 #endif
 
 #ifndef HAVE_U16
 # undef u16	/* In case there is a macro with that name.  */
 # if SIZEOF_UNSIGNED_INT == 2
    typedef unsigned int   u16;
 # elif SIZEOF_UNSIGNED_SHORT == 2
    typedef unsigned short u16;
 # else
 #  error no typedef for u16
 # endif
 # define HAVE_U16
 #endif
 
 #ifndef HAVE_U32
 # undef u32	/* In case there is a macro with that name.  */
 # if SIZEOF_UNSIGNED_INT == 4
    typedef unsigned int  u32;
 # elif SIZEOF_UNSIGNED_LONG == 4
    typedef unsigned long u32;
 # else
 #  error no typedef for u32
 # endif
 # define HAVE_U32
 #endif
 
 /*
  * Warning: Some systems segfault when this u64 typedef and
  * the dummy code in cipher/md.c is not available.  Examples are
  * Solaris and IRIX.
  */
 #ifndef HAVE_U64
 # undef u64	/* In case there is a macro with that name.  */
 # if SIZEOF_UINT64_T == 8
    typedef uint64_t u64;
 #  define U64_C(c) (UINT64_C(c))
 #  define HAVE_U64
 # elif SIZEOF_UNSIGNED_INT == 8
    typedef unsigned int u64;
 #  define U64_C(c) (c ## U)
 #  define HAVE_U64
 # elif SIZEOF_UNSIGNED_LONG == 8
    typedef unsigned long u64;
 #  define U64_C(c) (c ## UL)
 #  define HAVE_U64
 # elif SIZEOF_UNSIGNED_LONG_LONG == 8
    typedef unsigned long long u64;
 #  define U64_C(c) (c ## ULL)
 #  define HAVE_U64
 # else
 #  error No way to declare a 64 bit integer type
 # endif
 #endif
 
 typedef union
 {
   int a;
   short b;
   char c[1];
   long d;
   u64 e;
   float f;
   double g;
 } PROPERLY_ALIGNED_TYPE;
 
 #endif /*GCRYPT_TYPES_H*/
diff --git a/tests/aeswrap.c b/tests/aeswrap.c
index e5ecad75..c9465171 100644
--- a/tests/aeswrap.c
+++ b/tests/aeswrap.c
@@ -1,470 +1,470 @@
 /* aeswrap.c -  AESWRAP mode regression tests
  *	Copyright (C) 2009 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 
 #define PGM "aeswrap"
 #include "t-common.h"
 
 
 static void
 check_one (int algo,
            const void *kek, size_t keklen,
            const void *data, size_t datalen,
            const void *expected, size_t expectedlen,
            int inplace)
 {
   gcry_error_t err;
   gcry_cipher_hd_t hd;
   unsigned char outbuf[32+8];
   size_t outbuflen;
 
   err = gcry_cipher_open (&hd, algo, GCRY_CIPHER_MODE_AESWRAP, 0);
   if (err)
     {
       fail ("gcry_cipher_open failed: %s\n", gpg_strerror (err));
       return;
     }
 
   err = gcry_cipher_setkey (hd, kek, keklen);
   if (err)
     {
       fail ("gcry_cipher_setkey failed: %s\n", gpg_strerror (err));
       return;
     }
 
   outbuflen = datalen + 8;
   if (outbuflen > sizeof outbuf)
     {
       err = gpg_error (GPG_ERR_INTERNAL);
     }
   else if (inplace)
     {
       memcpy (outbuf, data, datalen);
       err = gcry_cipher_encrypt (hd, outbuf, outbuflen, outbuf, datalen);
     }
   else
     {
       err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen);
     }
 
   if (err)
     {
       fail ("gcry_cipher_encrypt failed: %s\n", gpg_strerror (err));
       return;
     }
 
   if (outbuflen != expectedlen || memcmp (outbuf, expected, expectedlen))
     {
       const unsigned char *s;
       int i;
 
       fail ("mismatch at encryption!%s\n", inplace ? " (inplace)" : "");
       fprintf (stderr, "computed: ");
       for (i = 0; i < outbuflen; i++)
 	fprintf (stderr, "%02x ", outbuf[i]);
       fprintf (stderr, "\nexpected: ");
       for (s = expected, i = 0; i < expectedlen; s++, i++)
         fprintf (stderr, "%02x ", *s);
       putc ('\n', stderr);
     }
 
 
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
     {
       err = gpg_error (GPG_ERR_INTERNAL);
     }
   else if (inplace)
     {
       memcpy (outbuf, expected, expectedlen);
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
     }
   else
     {
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
     }
 
   if (err)
     {
       fail ("gcry_cipher_decrypt failed: %s\n", gpg_strerror (err));
       return;
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
     {
       const unsigned char *s;
       int i;
 
       fail ("mismatch at decryption!%s\n", inplace ? " (inplace)" : "");
       fprintf (stderr, "computed: ");
       for (i = 0; i < outbuflen; i++)
 	fprintf (stderr, "%02x ", outbuf[i]);
       fprintf (stderr, "\nexpected: ");
       for (s = data, i = 0; i < datalen; s++, i++)
         fprintf (stderr, "%02x ", *s);
       putc ('\n', stderr);
     }
 
   /* Now the last step again with a key reset. */
   gcry_cipher_reset (hd);
 
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
     {
       err = gpg_error (GPG_ERR_INTERNAL);
     }
   else if (inplace)
     {
       memcpy (outbuf, expected, expectedlen);
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
     }
   else
     {
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
     }
 
   if (err)
     {
       fail ("gcry_cipher_decrypt(2) failed: %s\n", gpg_strerror (err));
       return;
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
     fail ("mismatch at decryption(2)!%s\n", inplace ? " (inplace)" : "");
 
   /* And once more without a key reset. */
   outbuflen = expectedlen - 8;
   if (outbuflen > sizeof outbuf)
     {
       err = gpg_error (GPG_ERR_INTERNAL);
     }
   else if (inplace)
     {
       memcpy (outbuf, expected, expectedlen);
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, outbuf, expectedlen);
     }
   else
     {
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
     }
 
   if (err)
     {
       fail ("gcry_cipher_decrypt(3) failed: %s\n", gpg_strerror (err));
       return;
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
     fail ("mismatch at decryption(3)!%s\n", inplace ? " (inplace)" : "");
 
   gcry_cipher_close (hd);
 }
 
 
 static void
 check (int algo,
        const void *kek, size_t keklen,
        const void *data, size_t datalen,
        const void *expected, size_t expectedlen)
 {
   check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 0);
   check_one (algo, kek, keklen, data, datalen, expected, expectedlen, 1);
 }
 
 
 static void
 check_one_with_padding (int algo,
                         const void *kek, size_t keklen,
                         const void *data, size_t datalen,
                         const void *expected, size_t expectedlen)
 {
   gcry_error_t err;
   gcry_cipher_hd_t hd;
   unsigned char outbuf[4*16];
   size_t outbuflen;
 
   err = gcry_cipher_open (&hd, algo, GCRY_CIPHER_MODE_AESWRAP,
                           GCRY_CIPHER_EXTENDED);
   if (err)
     {
       fail ("gcry_cipher_open failed: %s\n", gpg_strerror (err));
       return;
     }
 
   err = gcry_cipher_setkey (hd, kek, keklen);
   if (err)
     {
       fail ("gcry_cipher_setkey failed: %s\n", gpg_strerror (err));
       gcry_cipher_close (hd);
       return;
     }
 
   outbuflen = ((datalen+7)/8) * 8 + 8;
   if (outbuflen > sizeof outbuf)
     {
       err = gpg_error (GPG_ERR_INTERNAL);
     }
   else
     {
       err = gcry_cipher_encrypt (hd, outbuf, outbuflen, data, datalen);
     }
 
   if (err)
     {
       fail ("gcry_cipher_encrypt failed: %s\n", gpg_strerror (err));
       gcry_cipher_close (hd);
       return;
     }
 
   if (outbuflen != expectedlen || memcmp (outbuf, expected, expectedlen))
     {
       const unsigned char *s;
       int i;
 
       fail ("mismatch at encryption!(padding)\n");
       fprintf (stderr, "computed: ");
       for (i = 0; i < outbuflen; i++)
 	fprintf (stderr, "%02x ", outbuf[i]);
       fprintf (stderr, "\nexpected: ");
       for (s = expected, i = 0; i < expectedlen; s++, i++)
         fprintf (stderr, "%02x ", *s);
       putc ('\n', stderr);
     }
 
   outbuflen = ((datalen+7)/8) * 8 + 8;
   if (outbuflen > sizeof outbuf)
     {
       err = gpg_error (GPG_ERR_INTERNAL);
     }
   else
     {
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
       if (!err)
         {
           unsigned char plen[4];
           size_t nbytes;
           err = gcry_cipher_info (hd, GCRYCTL_GET_KEYLEN, plen, &nbytes);
           if (!err)
             outbuflen = (plen[0] << 24) | (plen[1] << 16)
               | (plen[2] << 8) | plen[3];
         }
     }
 
   if (err)
     {
       fail ("gcry_cipher_decrypt failed: %s\n", gpg_strerror (err));
       gcry_cipher_close (hd);
       return;
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
     {
       const unsigned char *s;
       int i;
 
       fail ("mismatch at decryption!(padding)\n");
       fprintf (stderr, "computed: ");
       for (i = 0; i < outbuflen; i++)
 	fprintf (stderr, "%02x ", outbuf[i]);
       fprintf (stderr, "\nexpected: ");
       for (s = data, i = 0; i < datalen; s++, i++)
         fprintf (stderr, "%02x ", *s);
       putc ('\n', stderr);
     }
 
   /* Now the last step again with a key reset. */
   gcry_cipher_reset (hd);
 
   outbuflen = ((datalen+7)/8) * 8 + 8;
   if (outbuflen > sizeof outbuf)
     {
       err = gpg_error (GPG_ERR_INTERNAL);
     }
   else
     {
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
       if (!err)
         {
           unsigned char plen[4];
           size_t nbytes;
           err = gcry_cipher_info (hd, GCRYCTL_GET_KEYLEN, plen, &nbytes);
           if (!err)
             outbuflen = (plen[0] << 24) | (plen[1] << 16)
               | (plen[2] << 8) | plen[3];
         }
     }
 
   if (err)
     {
       fail ("gcry_cipher_decrypt(2) failed: %s\n", gpg_strerror (err));
       gcry_cipher_close (hd);
       return;
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
     fail ("mismatch at decryption(2)(padding)!\n");
 
   /* And once more without a key reset. */
   outbuflen = ((datalen+7)/8) * 8 + 8;
   if (outbuflen > sizeof outbuf)
     {
       err = gpg_error (GPG_ERR_INTERNAL);
     }
   else
     {
       err = gcry_cipher_decrypt (hd, outbuf, outbuflen, expected, expectedlen);
       if (!err)
         {
           unsigned char plen[4];
           size_t nbytes;
           err = gcry_cipher_info (hd, GCRYCTL_GET_KEYLEN, plen, &nbytes);
           if (!err)
             outbuflen = (plen[0] << 24) | (plen[1] << 16)
               | (plen[2] << 8) | plen[3];
         }
     }
 
   if (err)
     {
       fail ("gcry_cipher_decrypt(3) failed: %s\n", gpg_strerror (err));
       gcry_cipher_close (hd);
       return;
     }
 
   if (outbuflen != datalen || memcmp (outbuf, data, datalen))
     fail ("mismatch at decryption(3)(padding)!\n");
 
   gcry_cipher_close (hd);
 }
 
 
 static void
 check_all (void)
 {
   if (verbose)
     fprintf (stderr, "4.1 Wrap 128 bits of Key Data with a 128-bit KEK\n");
   check
     (GCRY_CIPHER_AES128,
      "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", 16,
      "\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\xAA\xBB\xCC\xDD\xEE\xFF", 16,
      "\x1F\xA6\x8B\x0A\x81\x12\xB4\x47\xAE\xF3\x4B\xD8\xFB\x5A\x7B\x82"
      "\x9D\x3E\x86\x23\x71\xD2\xCF\xE5", 24);
 
   if (verbose)
     fprintf (stderr, "4.2 Wrap 128 bits of Key Data with a 192-bit KEK\n");
   check
     (GCRY_CIPHER_AES192,
      "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
      "\x10\x11\x12\x13\x14\x15\x16\x17", 24,
      "\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\xAA\xBB\xCC\xDD\xEE\xFF", 16,
      "\x96\x77\x8B\x25\xAE\x6C\xA4\x35\xF9\x2B\x5B\x97\xC0\x50\xAE\xD2"
      "\x46\x8A\xB8\xA1\x7A\xD8\x4E\x5D", 24);
 
   if (verbose)
     fprintf (stderr, "4.3 Wrap 128 bits of Key Data with a 256-bit KEK\n");
   check
     (GCRY_CIPHER_AES256,
      "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
      "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", 32,
      "\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\xAA\xBB\xCC\xDD\xEE\xFF", 16,
      "\x64\xE8\xC3\xF9\xCE\x0F\x5B\xA2\x63\xE9\x77\x79\x05\x81\x8A\x2A"
      "\x93\xC8\x19\x1E\x7D\x6E\x8A\xE7", 24);
 
   if (verbose)
     fprintf (stderr, "4.4 Wrap 192 bits of Key Data with a 192-bit KEK\n");
   check
     (GCRY_CIPHER_AES192,
      "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
      "\x10\x11\x12\x13\x14\x15\x16\x17", 24,
      "\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\xAA\xBB\xCC\xDD\xEE\xFF"
      "\x00\x01\x02\x03\x04\x05\x06\x07", 24,
      "\x03\x1D\x33\x26\x4E\x15\xD3\x32\x68\xF2\x4E\xC2\x60\x74\x3E\xDC"
      "\xE1\xC6\xC7\xDD\xEE\x72\x5A\x93\x6B\xA8\x14\x91\x5C\x67\x62\xD2", 32);
 
   if (verbose)
     fprintf (stderr, "4.5 Wrap 192 bits of Key Data with a 256-bit KEK\n");
   check
     (GCRY_CIPHER_AES256,
      "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
      "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", 32,
      "\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\xAA\xBB\xCC\xDD\xEE\xFF"
      "\x00\x01\x02\x03\x04\x05\x06\x07", 24,
      "\xA8\xF9\xBC\x16\x12\xC6\x8B\x3F\xF6\xE6\xF4\xFB\xE3\x0E\x71\xE4"
      "\x76\x9C\x8B\x80\xA3\x2C\xB8\x95\x8C\xD5\xD1\x7D\x6B\x25\x4D\xA1", 32);
 
   if (verbose)
     fprintf (stderr, "4.6 Wrap 256 bits of Key Data with a 256-bit KEK\n");
   check
     (GCRY_CIPHER_AES,
      "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
      "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", 32,
      "\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\xAA\xBB\xCC\xDD\xEE\xFF"
      "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", 32,
      "\x28\xC9\xF4\x04\xC4\xB8\x10\xF4\xCB\xCC\xB3\x5C\xFB\x87\xF8\x26"
      "\x3F\x57\x86\xE2\xD8\x0E\xD3\x26\xCB\xC7\xF0\xE7\x1A\x99\xF4\x3B"
      "\xFB\x98\x8B\x9B\x7A\x02\xDD\x21", 40);
 
   if (verbose)
     fprintf (stderr, "6 Wrap 160 bits of Key Data with a 192-bit KEK\n");
   check_one_with_padding
     (GCRY_CIPHER_AES192,
      "\x58\x40\xdf\x6e\x29\xb0\x2a\xf1\xab\x49\x3b\x70\x5b\xf1\x6e\xa1"
      "\xae\x83\x38\xf4\xdc\xc1\x76\xa8", 24,
      "\xc3\x7b\x7e\x64\x92\x58\x43\x40\xbe\xd1\x22\x07\x80\x89\x41\x15"
      "\x50\x68\xf7\x38", 20,
      "\x13\x8b\xde\xaa\x9b\x8f\xa7\xfc\x61\xf9\x77\x42\xe7\x22\x48\xee"
      "\x5a\xe6\xae\x53\x60\xd1\xae\x6a\x5f\x54\xf3\x73\xfa\x54\x3b\x6a", 32);
 
   if (verbose)
     fprintf (stderr, "6 Wrap 56 bits of Key Data with a 192-bit KEK\n");
   check_one_with_padding
     (GCRY_CIPHER_AES192,
      "\x58\x40\xdf\x6e\x29\xb0\x2a\xf1\xab\x49\x3b\x70\x5b\xf1\x6e\xa1"
      "\xae\x83\x38\xf4\xdc\xc1\x76\xa8", 24,
      "\x46\x6f\x72\x50\x61\x73\x69", 7,
      "\xaf\xbe\xb0\xf0\x7d\xfb\xf5\x41\x92\x00\xf2\xcc\xb5\x0b\xb2\x4f", 16);
 }
 
 int
 main (int argc, char **argv)
 {
   if (argc > 1 && !strcmp (argv[1], "--verbose"))
     verbose = 1;
   else if (argc > 1 && !strcmp (argv[1], "--debug"))
     verbose = debug = 1;
 
   if (!gcry_check_version (GCRYPT_VERSION))
     die ("version mismatch\n");
 
   xgcry_control ((GCRYCTL_DISABLE_SECMEM, 0));
   xgcry_control ((GCRYCTL_INITIALIZATION_FINISHED, 0));
   if (debug)
     xgcry_control ((GCRYCTL_SET_DEBUG_FLAGS, 1u, 0));
   check_all ();
 
   return error_count ? 1 : 0;
 }
diff --git a/tests/curves.c b/tests/curves.c
index 3c738171..27d43a74 100644
--- a/tests/curves.c
+++ b/tests/curves.c
@@ -1,353 +1,353 @@
 /* curves.c -  ECC curves regression tests
  *	Copyright (C) 2011 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 
 #include "../src/gcrypt-int.h"
 
 
 #define PGM "curves"
 #include "t-common.h"
 
 /* Number of curves defined in ../cipher/ecc-curves.c */
 #define N_CURVES 27
 
 /* A real world sample public key.  */
 static char const sample_key_1[] =
 "(public-key\n"
 " (ecdsa\n"
 "  (p #00FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF#)\n"
 "  (a #00FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC#)\n"
 "  (b #5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B#)\n"
 "  (g #046B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296"
         "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5#)\n"
 "  (n #00FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551#)\n"
 "  (h #000000000000000000000000000000000000000000000000000000000000000001#)\n"
 "  (q #0442B927242237639A36CE9221B340DB1A9AB76DF2FE3E171277F6A4023DED146EE"
       "86525E38CCECFF3FB8D152CC6334F70D23A525175C1BCBDDE6E023B2228770E#)\n"
 "  ))";
 static char const sample_key_1_curve[] = "NIST P-256";
 static unsigned int sample_key_1_nbits = 256;
 
 /* A made up sample public key.  */
 static char const sample_key_2[] =
 "(public-key\n"
 " (ecdh\n"
 "  (p #00e95e4a5f737059dc60dfc7ad95b3d8139515620f#)\n"
 "  (a #340e7be2a280eb74e2be61bada745d97e8f7c300#)\n"
 "  (b #1e589a8595423412134faa2dbdec95c8d8675e58#)\n"
 "  (g #04bed5af16ea3f6a4f62938c4631eb5af7bdbcdbc3"
         "1667cb477a1a8ec338f94741669c976316da6321#)\n"
 "  (n #00e95e4a5f737059dc60df5991d45029409e60fc09#)\n"
 "  (h #000000000000000000000000000000000000000000000000000000000000000001#)\n"
 "  (q #041111111111111111111111111111111111111111"
         "2222222222222222222222222222222222222222#)\n"
 "  ))";
 static char const sample_key_2_curve[] = "brainpoolP160r1";
 static unsigned int sample_key_2_nbits = 160;
 
 static int in_fips_mode;
 
 static void
 list_curves (void)
 {
   int idx;
   const char *name;
   unsigned int nbits;
 
   for (idx=0; (name = gcry_pk_get_curve (NULL, idx, &nbits)); idx++)
     {
       if (verbose)
         printf ("%s - %u bits\n", name, nbits);
     }
   if (idx != N_CURVES)
     fail ("expected %d curves but got %d\n", N_CURVES, idx);
   if (gcry_pk_get_curve (NULL, -1, NULL))
     fail ("curve iteration failed\n");
 }
 
 
 static void
 check_matching (void)
 {
   gpg_error_t err;
   gcry_sexp_t key;
   const char *name;
   unsigned int nbits;
 
   err = gcry_sexp_new (&key, sample_key_1, 0, 1);
   if (err)
     die ("parsing s-expression string failed: %s\n", gpg_strerror (err));
   name = gcry_pk_get_curve (key, 0, &nbits);
   if (!name)
     fail ("curve name not found for sample_key_1\n");
   else if (strcmp (name, sample_key_1_curve))
     fail ("expected curve name %s but got %s for sample_key_1\n",
           sample_key_1_curve, name);
   else if (nbits != sample_key_1_nbits)
     fail ("expected curve size %u but got %u for sample_key_1\n",
           sample_key_1_nbits, nbits);
 
   gcry_sexp_release (key);
 
   if (!in_fips_mode)
     {
       err = gcry_sexp_new (&key, sample_key_2, 0, 1);
       if (err)
         die ("parsing s-expression string failed: %s\n", gpg_strerror (err));
       name = gcry_pk_get_curve (key, 0, &nbits);
       if (!name)
         fail ("curve name not found for sample_key_2\n");
       else if (strcmp (name, sample_key_2_curve))
         fail ("expected curve name %s but got %s for sample_key_2\n",
               sample_key_2_curve, name);
       else if (nbits != sample_key_2_nbits)
         fail ("expected curve size %u but got %u for sample_key_2\n",
               sample_key_2_nbits, nbits);
 
       gcry_sexp_release (key);
     }
 }
 
 #define TEST_ERROR_EXPECTED (1 << 0)
 #define TEST_NOFIPS         (1 << 1)
 
 static void
 check_get_params (void)
 {
   static struct {
     int algo;
     const char *name;
     int flags;
   } tv[] =
       {
        { GCRY_PK_ECC, "Ed25519", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.6.1.4.1.11591.15.1", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.101.112", TEST_NOFIPS },
 
        { GCRY_PK_ECC, "Curve25519", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.6.1.4.1.3029.1.5.1", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.101.110", TEST_NOFIPS },
        { GCRY_PK_ECC, "X25519", TEST_NOFIPS },
 
        { GCRY_PK_ECC, "Ed448", TEST_NOFIPS },
        { GCRY_PK_ECC, "X448", TEST_NOFIPS  },
        { GCRY_PK_ECC, "1.3.101.113", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.101.111", TEST_NOFIPS },
 
        { GCRY_PK_ECC, "NIST P-192", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.840.10045.3.1.1", TEST_NOFIPS },
        { GCRY_PK_ECC, "prime192v1", TEST_NOFIPS },
        { GCRY_PK_ECC, "secp192r1", TEST_NOFIPS },
        { GCRY_PK_ECC, "nistp192", TEST_NOFIPS },
 
        { GCRY_PK_ECC, "NIST P-224" },
        { GCRY_PK_ECC, "secp224r1"  },
        { GCRY_PK_ECC, "1.3.132.0.33" },
        { GCRY_PK_ECC, "nistp224"   },
 
        { GCRY_PK_ECC, "NIST P-256" },
        { GCRY_PK_ECC, "1.2.840.10045.3.1.7" },
        { GCRY_PK_ECC, "prime256v1" },
        { GCRY_PK_ECC, "secp256r1" },
        { GCRY_PK_ECC, "nistp256"  },
 
        { GCRY_PK_ECC, "NIST P-384" },
        { GCRY_PK_ECC, "secp384r1" },
        { GCRY_PK_ECC, "1.3.132.0.34" },
        { GCRY_PK_ECC, "nistp384"   },
 
        { GCRY_PK_ECC, "NIST P-521" },
        { GCRY_PK_ECC, "secp521r1" },
        { GCRY_PK_ECC, "1.3.132.0.35" },
        { GCRY_PK_ECC, "nistp521"   },
 
        { GCRY_PK_ECC, "brainpoolP160r1",       TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.36.3.3.2.8.1.1.1",  TEST_NOFIPS },
        { GCRY_PK_ECC, "brainpoolP192r1",       TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.36.3.3.2.8.1.1.3",  TEST_NOFIPS },
        { GCRY_PK_ECC, "brainpoolP224r1",       TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.36.3.3.2.8.1.1.5",  TEST_NOFIPS },
        { GCRY_PK_ECC, "brainpoolP256r1",       TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.36.3.3.2.8.1.1.7",  TEST_NOFIPS },
        { GCRY_PK_ECC, "brainpoolP320r1",       TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.36.3.3.2.8.1.1.9",  TEST_NOFIPS },
        { GCRY_PK_ECC, "brainpoolP384r1",       TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.36.3.3.2.8.1.1.11", TEST_NOFIPS },
        { GCRY_PK_ECC, "brainpoolP512r1",       TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.36.3.3.2.8.1.1.13", TEST_NOFIPS },
 
        { GCRY_PK_ECC, "GOST2001-test", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.2.2.35.0", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-A", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.2.2.35.1", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-B", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.2.2.35.2", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-C", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.2.2.35.3", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-A", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-XchA", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-C", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-XchB", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-A", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.2.2.36.0", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-C", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.2.2.36.1", TEST_NOFIPS },
 
        /* Noet that GOST2012-256-tc26-A" is only in the curve alias
         * list but has no parameter entry.  */
        { GCRY_PK_ECC, "GOST2001-CryptoPro-A", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.7.1.2.1.1.2", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-A", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-256-tc26-B", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-B", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.7.1.2.1.1.3", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-B", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-256-tc26-C", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-C", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.7.1.2.1.1.4", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2001-CryptoPro-C", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-256-tc26-D", TEST_NOFIPS },
 
        { GCRY_PK_ECC, "GOST2012-512-test", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-test", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-512-test", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.7.1.2.1.2.0", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-512-tc26-A", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-tc26-A", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-512-tc26-B", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-tc26-B", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-512-tc26-A", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.7.1.2.1.2.1", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-512-tc26-B", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.7.1.2.1.2.2", TEST_NOFIPS },
        { GCRY_PK_ECC, "GOST2012-512-tc26-C", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.643.7.1.2.1.2.3", TEST_NOFIPS },
 
        { GCRY_PK_ECC, "secp256k1", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.3.132.0.10", TEST_NOFIPS },
 
        { GCRY_PK_ECC, "sm2p256v1", TEST_NOFIPS },
        { GCRY_PK_ECC, "1.2.156.10197.1.301", TEST_NOFIPS },
 
        /* Check also the ECC algo mapping.  */
        { GCRY_PK_ECDSA, "Ed25519", TEST_NOFIPS },
        { GCRY_PK_EDDSA, "Ed25519", TEST_NOFIPS },
        { GCRY_PK_ECDH,  "Ed25519", TEST_NOFIPS },
        { GCRY_PK_ECDSA, "Curve25519", TEST_NOFIPS },
        { GCRY_PK_EDDSA, "Curve25519", TEST_NOFIPS },
        { GCRY_PK_ECDH,  "Curve25519", TEST_NOFIPS },
        { GCRY_PK_ECC,   "NoSuchCurve", TEST_ERROR_EXPECTED },
        { GCRY_PK_RSA,   "rsa", TEST_ERROR_EXPECTED },
        { GCRY_PK_ELG,   "elg", TEST_ERROR_EXPECTED },
        { GCRY_PK_DSA,   "dsa", TEST_ERROR_EXPECTED }
       };
   int idx;
   gcry_sexp_t param;
   const char *name;
 
   param = gcry_pk_get_param (GCRY_PK_ECDSA, sample_key_1_curve);
   if (!param)
     fail ("error gerring parameters for `%s'\n", sample_key_1_curve);
 
   name = gcry_pk_get_curve (param, 0, NULL);
   if (!name)
     fail ("get_param: curve name not found for sample_key_1\n");
   else if (strcmp (name, sample_key_1_curve))
     fail ("get_param: expected curve name %s but got %s for sample_key_1\n",
           sample_key_1_curve, name);
 
   gcry_sexp_release (param);
 
   if (!in_fips_mode)
     {
       param = gcry_pk_get_param (GCRY_PK_ECDSA, sample_key_2_curve);
       if (!param)
         fail ("error gerring parameters for `%s'\n", sample_key_2_curve);
 
       name = gcry_pk_get_curve (param, 0, NULL);
       if (!name)
         fail ("get_param: curve name not found for sample_key_2\n");
       else if (strcmp (name, sample_key_2_curve))
         fail ("get_param: expected curve name %s but got %s for sample_key_2\n",
               sample_key_2_curve, name);
 
       gcry_sexp_release (param);
     }
 
   /* Some simple tests */
   for (idx=0; idx < DIM (tv); idx++)
     {
       param = gcry_pk_get_param (tv[idx].algo, tv[idx].name);
       if (in_fips_mode && tv[idx].flags & TEST_NOFIPS)
         {
           if (param)
             fail ("get_param: test %d (%s) should have failed in fips mode\n",
                   idx, tv[idx].name);
         }
       else {
         if (!param)
           {
             if (!(tv[idx].flags & TEST_ERROR_EXPECTED))
               fail ("get_param: test %d (%s) failed\n", idx, tv[idx].name);
           }
         else
           {
             if (tv[idx].flags & TEST_ERROR_EXPECTED)
               fail ("get_param: test %d (%s) failed (error expected)\n",
                     idx, tv[idx].name);
           }
         }
       gcry_sexp_release (param);
     }
 }
 
 
 int
 main (int argc, char **argv)
 {
   if (argc > 1 && !strcmp (argv[1], "--verbose"))
     verbose = 1;
   else if (argc > 1 && !strcmp (argv[1], "--debug"))
     verbose = debug = 1;
 
   if (!gcry_check_version (GCRYPT_VERSION))
     die ("version mismatch\n");
 
   xgcry_control ((GCRYCTL_DISABLE_SECMEM, 0));
   xgcry_control ((GCRYCTL_INITIALIZATION_FINISHED, 0));
   if (debug)
     xgcry_control ((GCRYCTL_SET_DEBUG_FLAGS, 1u, 0));
 
   if (gcry_fips_mode_active ())
     in_fips_mode = 1;
 
   list_curves ();
   check_matching ();
   check_get_params ();
 
   return error_count ? 1 : 0;
 }
diff --git a/tests/hmac.c b/tests/hmac.c
index 2b4c0f9f..9164d3a8 100644
--- a/tests/hmac.c
+++ b/tests/hmac.c
@@ -1,203 +1,203 @@
 /* hmac.c -  HMAC regression tests
  *	Copyright (C) 2005 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 
 #define PGM "hmac"
 #include "t-common.h"
 
 
 static void
 check_one_mac (int algo,
                const void *key, size_t keylen,
                const void *data, size_t datalen,
                const char *expect)
 {
   gcry_md_hd_t hd;
   unsigned char *p;
   int mdlen;
   int i;
   gcry_error_t err = 0;
 
   err = gcry_md_open (&hd, algo, GCRY_MD_FLAG_HMAC);
   if (err)
     {
       fail ("algo %d, gcry_md_open failed: %s\n", algo, gpg_strerror (err));
       return;
     }
 
   mdlen = gcry_md_get_algo_dlen (algo);
   if (mdlen < 1 || mdlen > 500)
     {
       fail ("algo %d, gcry_md_get_algo_dlen failed: %d\n", algo, mdlen);
       return;
     }
 
   err = gcry_md_setkey (hd, key, keylen);
   if (err)
     {
       fail ("algo %d, gcry_md_setkey failed: %s\n", algo, gpg_strerror (err));
       return;
     }
 
   gcry_md_write (hd, data, datalen);
 
   p = gcry_md_read (hd, 0);
 
   if (memcmp (p, expect, mdlen))
     {
       printf ("computed: ");
       for (i = 0; i < mdlen; i++)
 	printf ("%02x ", p[i] & 0xFF);
       printf ("\nexpected: ");
       for (i = 0; i < mdlen; i++)
 	printf ("%02x ", expect[i] & 0xFF);
       printf ("\n");
 
       fail ("algo %d, MAC does not match\n", algo);
     }
 
   gcry_md_close (hd);
 }
 
 static void
 check_hmac (void)
 {
   unsigned char key[128];
   int i, j;
 
   if (verbose)
     fprintf (stderr, "checking FIPS-198a, A.1\n");
   for (i=0; i < 64; i++)
     key[i] = i;
   check_one_mac (GCRY_MD_SHA1, key, 64, "Sample #1", 9,
                  "\x4f\x4c\xa3\xd5\xd6\x8b\xa7\xcc\x0a\x12"
                  "\x08\xc9\xc6\x1e\x9c\x5d\xa0\x40\x3c\x0a");
 
   if (verbose)
     fprintf (stderr, "checking FIPS-198a, A.2\n");
   for (i=0, j=0x30; i < 20; i++)
     key[i] = j++;
   check_one_mac (GCRY_MD_SHA1, key, 20, "Sample #2", 9,
                  "\x09\x22\xd3\x40\x5f\xaa\x3d\x19\x4f\x82"
                  "\xa4\x58\x30\x73\x7d\x5c\xc6\xc7\x5d\x24");
 
   if (verbose)
     fprintf (stderr, "checking FIPS-198a, A.3\n");
   for (i=0, j=0x50; i < 100; i++)
     key[i] = j++;
   check_one_mac (GCRY_MD_SHA1, key, 100, "Sample #3", 9,
                  "\xbc\xf4\x1e\xab\x8b\xb2\xd8\x02\xf3\xd0"
                  "\x5c\xaf\x7c\xb0\x92\xec\xf8\xd1\xa3\xaa");
 
   if (verbose)
     fprintf (stderr, "checking FIPS-198a, A.4\n");
   for (i=0, j=0x70; i < 49; i++)
     key[i] = j++;
   check_one_mac (GCRY_MD_SHA1, key, 49, "Sample #4", 9,
                  "\x9e\xa8\x86\xef\xe2\x68\xdb\xec\xce\x42"
                  "\x0c\x75\x24\xdf\x32\xe0\x75\x1a\x2a\x26");
 
 }
 
 
 static void
 check_hmac_multi (void)
 {
   gpg_error_t err;
   unsigned char key[128];
   const char msg[] = "Sample #1";
   const char mac[] = ("\x4f\x4c\xa3\xd5\xd6\x8b\xa7\xcc\x0a\x12"
                       "\x08\xc9\xc6\x1e\x9c\x5d\xa0\x40\x3c\x0a");
   gcry_buffer_t iov[4];
   char digest[64];
   int i;
   int algo;
   int maclen;
 
   if (verbose)
     fprintf (stderr, "checking HMAC using multiple buffers\n");
   for (i=0; i < 64; i++)
     key[i] = i;
 
   memset (iov, 0, sizeof iov);
   iov[0].data = key;
   iov[0].len = 64;
   iov[1].data = (void*)msg;
   iov[1].off = 0;
   iov[1].len = 3;
   iov[2].data = (void*)msg;
   iov[2].off = 3;
   iov[2].len = 1;
   iov[3].data = (void*)msg;
   iov[3].off = 4;
   iov[3].len = 5;
 
   algo = GCRY_MD_SHA1;
   maclen = gcry_md_get_algo_dlen (algo);
   err = gcry_md_hash_buffers (algo, GCRY_MD_FLAG_HMAC, digest, iov, 4);
   if (err)
     {
       fail ("gcry_md_hash_buffers failed for algo %d: %s\n",
             algo, gpg_strerror (err));
       return;
     }
 
   if (memcmp (digest, mac, maclen))
     {
       printf ("computed: ");
       for (i = 0; i < maclen; i++)
 	printf ("%02x ", digest[i] & 0xFF);
       printf ("\nexpected: ");
       for (i = 0; i < maclen; i++)
 	printf ("%02x ", mac[i] & 0xFF);
       printf ("\n");
 
       fail ("gcry_md_hash_buffers, algo %d, MAC does not match\n", algo);
     }
 }
 
 
 int
 main (int argc, char **argv)
 {
   if (argc > 1 && !strcmp (argv[1], "--verbose"))
     verbose = 1;
   else if (argc > 1 && !strcmp (argv[1], "--debug"))
     verbose = debug = 1;
 
   if (!gcry_check_version (GCRYPT_VERSION))
     die ("version mismatch\n");
 
   xgcry_control ((GCRYCTL_DISABLE_SECMEM, 0));
   xgcry_control ((GCRYCTL_INITIALIZATION_FINISHED, 0));
   if (debug)
     xgcry_control ((GCRYCTL_SET_DEBUG_FLAGS, 1u, 0));
   check_hmac ();
   check_hmac_multi ();
 
   return error_count ? 1 : 0;
 }
diff --git a/tests/keygrip.c b/tests/keygrip.c
index 49bd71bc..114b6f04 100644
--- a/tests/keygrip.c
+++ b/tests/keygrip.c
@@ -1,404 +1,404 @@
 /* keygrip.c - verifies that keygrips are calculated as expected
  *	Copyright (C) 2005 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 #include <assert.h>
 
 #define PGM "keygrip"
 #include "t-common.h"
 
 static int repetitions;
 
 /* Whether fips mode was active at startup.  */
 static int in_fips_mode;
 
 
 
 static void
 print_hex (const char *text, const void *buf, size_t n)
 {
   const unsigned char *p = buf;
 
   fputs (text, stdout);
   for (; n; n--, p++)
     printf ("%02X", *p);
   putchar ('\n');
 }
 
 
 
 
 static struct
 {
   int algo;
   const char *key;
   const unsigned char grip[20];
   int skip_when_fips;
 } key_grips[] =
   {
     {
       GCRY_PK_RSA,
       "(private-key"
       " (rsa"
       "  (n #00B6B509596A9ECABC939212F891E656A626BA07DA8521A9CAD4C08E640C04052FBB87F424EF1A0275A48A9299AC9DB69ABE3D0124E6C756B1F7DFB9B842D6251AEA6EE85390495CADA73D671537FCE5850A932F32BAB60AB1AC1F852C1F83C625E7A7D70CDA9EF16D5C8E47739D77DF59261ABE8454807FF441E143FBD37F8545#)"
       "  (e #010001#)"
       "  (d #077AD3DE284245F4806A1B82B79E616FBDE821C82D691A65665E57B5FAD3F34E67F401E7BD2E28699E89D9C496CF821945AE83AC7A1231176A196BA6027E77D85789055D50404A7A2A95B1512F91F190BBAEF730ED550D227D512F89C0CDB31AC06FA9A19503DDF6B66D0B42B9691BFD6140EC1720FFC48AE00C34796DC899E5#)"
       "  (p #00D586C78E5F1B4BF2E7CD7A04CA091911706F19788B93E44EE20AAF462E8363E98A72253ED845CCBF2481BB351E8557C85BCFFF0DABDBFF8E26A79A0938096F27#)"
       "  (q #00DB0CDF60F26F2A296C88D6BF9F8E5BE45C0DDD713C96CC73EBCB48B061740943F21D2A93D6E42A7211E7F02A95DCED6C390A67AD21ECF739AE8A0CA46FF2EBB3#)"
       "  (u #33149195F16912DB20A48D020DBC3B9E3881B39D722BF79378F6340F43148A6E9FC5F53E2853B7387BA4443BA53A52FCA8173DE6E85B42F9783D4A7817D0680B#)))",
       "\x32\xCF\xFA\x85\xB1\x79\x1F\xBB\x26\x14\xE9\x1A\xFD\xF3\xAF\xE3\x32\x08\x2E\x25"
     },
     {
       GCRY_PK_DSA,
       " (public-key"
       " (dsa"
       "  (p #0084E4C626E16005770BD9509ABF7354492E85B8C0060EFAAAEC617F725B592FAA59DF5460575F41022776A9718CE62EDD542AB73C7720869EBDBC834D174ADCD7136827DF51E2613545A25CA573BC502A61B809000B6E35F5EB7FD6F18C35678C23EA1C3638FB9CFDBA2800EE1B62F41A4479DE824F2834666FBF8DC5B53C2617#)"
       "  (q #00B0E6F710051002A9F425D98A677B18E0E5B038AB#)"
       "  (g #44370CEE0FE8609994183DBFEBA7EEA97D466838BCF65EFF506E35616DA93FA4E572A2F08886B74977BC00CA8CD3DBEA7AEB7DB8CBB180E6975E0D2CA76E023E6DE9F8CCD8826EBA2F72B8516532F6001DEFFAE76AA5E59E0FA33DBA3999B4E92D1703098CDEDCC416CF008801964084CDE1980132B2B78CB4CE9C15A559528B#)"
       "  (y #3D5DD14AFA2BF24A791E285B90232213D0E3BA74AB1109E768AED19639A322F84BB7D959E2BA92EF73DE4C7F381AA9F4053CFA3CD4527EF9043E304E5B95ED0A3A5A9D590AA641C13DB2B6E32B9B964A6A2C730DD3EA7C8E13F7A140AFF1A91CE375E9B9B960384779DC4EA180FA1F827C52288F366C0770A220F50D6D8FD6F6#)))",
       "\x04\xA3\x4F\xA0\x2B\x03\x94\xD7\x32\xAD\xD5\x9B\x50\xAF\xDB\x5D\x57\x22\xA6\x10"
 
     },
     {
       GCRY_PK_DSA,
       "(private-key"
       " (dsa"
       "  (p #0084E4C626E16005770BD9509ABF7354492E85B8C0060EFAAAEC617F725B592FAA59DF5460575F41022776A9718CE62EDD542AB73C7720869EBDBC834D174ADCD7136827DF51E2613545A25CA573BC502A61B809000B6E35F5EB7FD6F18C35678C23EA1C3638FB9CFDBA2800EE1B62F41A4479DE824F2834666FBF8DC5B53C2617#)"
       "  (q #00B0E6F710051002A9F425D98A677B18E0E5B038AB#)"
       "  (g #44370CEE0FE8609994183DBFEBA7EEA97D466838BCF65EFF506E35616DA93FA4E572A2F08886B74977BC00CA8CD3DBEA7AEB7DB8CBB180E6975E0D2CA76E023E6DE9F8CCD8826EBA2F72B8516532F6001DEFFAE76AA5E59E0FA33DBA3999B4E92D1703098CDEDCC416CF008801964084CDE1980132B2B78CB4CE9C15A559528B#)"
       "  (y #3D5DD14AFA2BF24A791E285B90232213D0E3BA74AB1109E768AED19639A322F84BB7D959E2BA92EF73DE4C7F381AA9F4053CFA3CD4527EF9043E304E5B95ED0A3A5A9D590AA641C13DB2B6E32B9B964A6A2C730DD3EA7C8E13F7A140AFF1A91CE375E9B9B960384779DC4EA180FA1F827C52288F366C0770A220F50D6D8FD6F6#)"
       "  (x #0087F9E91BFBCC1163DE71ED86D557708E32F8ADDE#)))",
       "\x04\xA3\x4F\xA0\x2B\x03\x94\xD7\x32\xAD\xD5\x9B\x50\xAF\xDB\x5D\x57\x22\xA6\x10"
     },
     {
       GCRY_PK_ECDSA,
       "(public-key"
       " (ecdsa(flags param)"
       " (p #00FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF#)"
       " (a #00FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC#)"
       " (b #5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B#)"
       " (g #046B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C2964FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5#)"
       " (n #00FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551#)"
       " (h #000000000000000000000000000000000000000000000000000000000000000001#)"
       " (q #04C8A4CEC2E9A9BC8E173531A67B0840DF345C32E261ADD780E6D83D56EFADFD5DE872F8B854819B59543CE0B7F822330464FBC4E6324DADDCD9D059554F63B344#)))",
       "\xE6\xDF\x94\x2D\xBD\x8C\x77\x05\xA3\xDD\x41\x6E\xFC\x04\x01\xDB\x31\x0E\x99\xB6"
     },
     {
       GCRY_PK_ECDSA,
       "(public-key"
       " (ecdsa(flags param)"
       " (p #00FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF#)"
       " (curve \"NIST P-256\")"
       " (b #5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B#)"
       " (g #046B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C2964FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5#)"
       " (n #00FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551#)"
       " (h #000000000000000000000000000000000000000000000000000000000000000001#)"
       " (q #04C8A4CEC2E9A9BC8E173531A67B0840DF345C32E261ADD780E6D83D56EFADFD5DE872F8B854819B59543CE0B7F822330464FBC4E6324DADDCD9D059554F63B344#)))",
       "\xE6\xDF\x94\x2D\xBD\x8C\x77\x05\xA3\xDD\x41\x6E\xFC\x04\x01\xDB\x31\x0E\x99\xB6"
     },
     {
       GCRY_PK_ECDSA,
       "(public-key"
       " (ecdsa"
       " (p #00FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF#)"
       " (curve \"NIST P-256\")"
       " (b #5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B#)"
       " (g #046B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C2964FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5#)"
       " (n #00FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551#)"
       " (h #000000000000000000000000000000000000000000000000000000000000000001#)"
       " (q #04C8A4CEC2E9A9BC8E173531A67B0840DF345C32E261ADD780E6D83D56EFADFD5DE872F8B854819B59543CE0B7F822330464FBC4E6324DADDCD9D059554F63B344#)))",
       "\xE6\xDF\x94\x2D\xBD\x8C\x77\x05\xA3\xDD\x41\x6E\xFC\x04\x01\xDB\x31\x0E\x99\xB6"
     },
     {
       GCRY_PK_ECDSA,
       "(public-key"
       " (ecdsa"
       " (curve secp256r1)"
       " (q #04C8A4CEC2E9A9BC8E173531A67B0840DF345C32E261ADD780E6D83D56EFADFD5DE872F8B854819B59543CE0B7F822330464FBC4E6324DADDCD9D059554F63B344#)))",
       "\xE6\xDF\x94\x2D\xBD\x8C\x77\x05\xA3\xDD\x41\x6E\xFC\x04\x01\xDB\x31\x0E\x99\xB6"
     },
     {
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve secp256r1)"
       " (q #04C8A4CEC2E9A9BC8E173531A67B0840DF345C32E261ADD780E6D83D56EFADFD5DE872F8B854819B59543CE0B7F822330464FBC4E6324DADDCD9D059554F63B344#)))",
       "\xE6\xDF\x94\x2D\xBD\x8C\x77\x05\xA3\xDD\x41\x6E\xFC\x04\x01\xDB\x31\x0E\x99\xB6"
     },
     {
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve brainpoolP256r1)"
       " (q #042ECD8679930BE2DB4AD42B8600BA3F80"
       /*   */"2D4D539BFF2F69B83EC9B7BBAA7F3406"
       /*   */"436DD11A1756AFE56CD93408410FCDA9"
       /*   */"BA95024EB613BD481A14FCFEC27A448A#)))",
       "\x52\xBA\xD4\xB4\xA3\x2D\x32\xA1\xDD\x06"
       "\x5E\x99\x0B\xF1\xAB\xC1\x13\x3D\x84\xD4",
       1
     },
     { /* Compressed form of above.  */
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve brainpoolP256r1)"
       " (q #022ECD8679930BE2DB4AD42B8600BA3F80"
       /*   */"2D4D539BFF2F69B83EC9B7BBAA7F3406#)))",
       "\x52\xBA\xD4\xB4\xA3\x2D\x32\xA1\xDD\x06"
       "\x5E\x99\x0B\xF1\xAB\xC1\x13\x3D\x84\xD4",
       1
     },
     {
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve brainpoolP256r1)"
       " (q #045B784CA008EE64AB3D85017EE0D2BE87"
       /*   */"558762C7300E0C8E06B1F9AF7C031458"
       /*   */"9EBBA41915313417BA54218EB0569C59"
       /*   */"0B156C76DBCAB6E84575E6EF68CE7B87#)))",
       "\x99\x38\x6A\x82\x41\x96\x29\x9C\x89\x74"
       "\xD6\xE1\xBF\x43\xAC\x9B\x9A\x12\xE7\x3F",
       1
     },
     { /* Compressed form of above.  */
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve brainpoolP256r1)"
       " (q #035B784CA008EE64AB3D85017EE0D2BE87"
       /*   */"558762C7300E0C8E06B1F9AF7C031458#)))",
       "\x99\x38\x6A\x82\x41\x96\x29\x9C\x89\x74"
       "\xD6\xE1\xBF\x43\xAC\x9B\x9A\x12\xE7\x3F",
       1
     },
     { /* Ed25519 standard */
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve Ed25519)"
       " (q #04"
       "     1CC662926E7EFF4982B7FB8B928E61CD74CCDD85277CC57196C3AD20B611085F"
       "     47BD24842905C049257673B3F5249524E0A41FAA17B25B818D0F97E625F1A1D0#)"
       "     ))",
       "\x0C\xCA\xB2\xFD\x48\x9A\x33\x40\x2C\xE8"
       "\xE0\x4A\x1F\xB2\x45\xEA\x80\x3D\x0A\xF1",
       1
     },
     { /* Ed25519+EdDSA */
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve Ed25519)(flags eddsa)"
       " (q #773E72848C1FD5F9652B29E2E7AF79571A04990E96F2016BF4E0EC1890C2B7DB#)"
       " ))",
       "\x9D\xB6\xC6\x4A\x38\x83\x0F\x49\x60\x70"
       "\x17\x89\x47\x55\x20\xBE\x8C\x82\x1F\x47",
       1
     },
     { /* Ed25519+EdDSA (with compression prefix) */
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve Ed25519)(flags eddsa)"
       " (q #40"
       "     773E72848C1FD5F9652B29E2E7AF79571A04990E96F2016BF4E0EC1890C2B7DB#)"
       " ))",
       "\x9D\xB6\xC6\x4A\x38\x83\x0F\x49\x60\x70"
       "\x17\x89\x47\x55\x20\xBE\x8C\x82\x1F\x47",
       1
     },
     { /* Ed25519+EdDSA  (same but uncompressed)*/
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve Ed25519)(flags eddsa)"
       " (q #04"
       "     629ad237d1ed04dcd4abe1711dd699a1cf51b1584c4de7a4ef8b8a640180b26f"
       "     5bb7c29018ece0f46b01f2960e99041a5779afe7e2292b65f9d51f8c84723e77#)"
       " ))",
       "\x9D\xB6\xC6\x4A\x38\x83\x0F\x49\x60\x70"
       "\x17\x89\x47\x55\x20\xBE\x8C\x82\x1F\x47",
       1
     },
     { /* Cv25519 */
       GCRY_PK_ECC,
       "(public-key"
       " (ecc"
       " (curve Curve25519)(flags djb-tweak)"
       " (q #40"
       "     918C1733127F6BF2646FAE3D081A18AE77111C903B906310B077505EFFF12740#)"
       " ))",
       "\x0F\x89\xA5\x65\xD3\xEA\x18\x7C\xE8\x39"
       "\x33\x23\x98\xF5\xD4\x80\x67\x7D\xF4\x9C",
       1
     },
     { /* Random key  */
       GCRY_PK_RSA,
       "(shadowed-private-key"
       " (rsa"
       " (n #00B493C79928398DA9D99AC0E949FE6EB62F683CB974FFFBFBC01066F5C9A89B"
       "     D3DC48EAD7C65F36EA943C2B2C865C26C4884FF9EDFDA8C99C855B737D77EEF6"
       "     B85DBC0CCEC0E900C1F89A6893A2A93E8B31028469B6927CEB2F08687E547C68"
       "     6B0A2F7E50A194FF7AB7637E03DE0912EF7F6E5F1EC37625BD1620CCC2E7A564"
       "     31E168CDAFBD1D9E61AE47A69A6FA03EF22F844528A710B2392F262B95A3078C"
       "     F321DC8325F92A5691EF69F34FD0DE0B22C79D29DC87723FCADE463829E8E5F7"
       "     D196D73D6C9C180F6A6A0DDBF7B9D8F7FA293C36163B12199EF6A1A95CAE4051"
       "     E3069C522CC6C4A7110F663A5DAD20F66C13A1674D050088208FAE4F33B3AB51"
       "     03#)"
       " (e #00010001#)"
       " (shadowed t1-v1"
       " (#D2760001240102000005000123350000# OPENPGP.1)"
       ")))",
       "\xE5\x6E\xE6\xEE\x5A\x2F\xDC\x3E\x98\x9D"
       "\xFE\x49\xDA\xF5\x67\x43\xE3\x27\x28\x33"
     }
   };
 
 
 static void
 check (void)
 {
   unsigned char buf[20];
   unsigned char *ret;
   gcry_error_t err;
   gcry_sexp_t sexp;
   unsigned int i;
   int repn;
 
   for (i = 0; i < (sizeof (key_grips) / sizeof (*key_grips)); i++)
     {
       if (in_fips_mode && key_grips[i].skip_when_fips)
         continue;
 
       if (gcry_pk_test_algo (key_grips[i].algo))
         {
           if (verbose)
             fprintf (stderr, "algo %d not available; test skipped\n",
                      key_grips[i].algo);
           continue;
         }
       err = gcry_sexp_sscan (&sexp, NULL, key_grips[i].key,
 			     strlen (key_grips[i].key));
       if (err)
         die ("scanning data %d failed: %s\n", i, gpg_strerror (err));
 
       if (debug)
         info ("check(%d): s-exp='%s'\n", i, key_grips[i].key);
 
       for (repn=0; repn < repetitions; repn++)
         {
           ret = gcry_pk_get_keygrip (sexp, buf);
           if (!ret)
             die ("gcry_pk_get_keygrip failed for %d\n", i);
 
           if ( memcmp (key_grips[i].grip, buf, sizeof (buf)) )
             {
               print_hex ("keygrip: ", buf, sizeof buf);
               die ("keygrip for %d does not match\n", i);
             }
           else if (debug && !repn)
             print_hex ("keygrip: ", buf, sizeof buf);
         }
 
       gcry_sexp_release (sexp);
     }
 }
 
 
 
 static void
 progress_handler (void *cb_data, const char *what, int printchar,
 		  int current, int total)
 {
   (void)cb_data;
   (void)what;
   (void)current;
   (void)total;
 
   putchar (printchar);
 }
 
 int
 main (int argc, char **argv)
 {
   int last_argc = -1;
 
   if (argc)
     { argc--; argv++; }
 
   while (argc && last_argc != argc )
     {
       last_argc = argc;
       if (!strcmp (*argv, "--"))
         {
           argc--; argv++;
           break;
         }
       else if (!strcmp (*argv, "--verbose"))
         {
           verbose = 1;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--debug"))
         {
           verbose = 1;
           debug = 1;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--repetitions"))
         {
           argc--; argv++;
           if (argc)
             {
               repetitions = atoi(*argv);
               argc--; argv++;
             }
         }
     }
 
   if (repetitions < 1)
     repetitions = 1;
 
   if (!gcry_check_version (GCRYPT_VERSION))
     die ("version mismatch\n");
 
   gcry_set_progress_handler (progress_handler, NULL);
 
   xgcry_control ((GCRYCTL_DISABLE_SECMEM, 0));
   xgcry_control ((GCRYCTL_INITIALIZATION_FINISHED, 0));
   if (debug)
     xgcry_control ((GCRYCTL_SET_DEBUG_FLAGS, 1u, 0));
 
   if (gcry_fips_mode_active ())
     in_fips_mode = 1;
 
   check ();
 
   return 0;
 }
diff --git a/tests/prime.c b/tests/prime.c
index 42264980..a73eb05a 100644
--- a/tests/prime.c
+++ b/tests/prime.c
@@ -1,241 +1,241 @@
 /* prime.c - part of the Libgcrypt test suite.
    Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.
 
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
-   USA.  */
+   along with this program; if not, see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: GPL-2-or-later
+*/
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 
 #define PGM "prime"
 #include "t-common.h"
 
 static void
 check_primes (void)
 {
   gcry_error_t err = GPG_ERR_NO_ERROR;
   gcry_mpi_t *factors = NULL;
   gcry_mpi_t prime = NULL;
   gcry_mpi_t g;
   unsigned int i = 0;
   struct prime_spec
   {
     unsigned int prime_bits;
     unsigned int factor_bits;
     unsigned int flags;
   } prime_specs[] =
     {
       { 1024, 100, GCRY_PRIME_FLAG_SPECIAL_FACTOR },
       { 128, 0, 0 },
       { 0 },
     };
 
   for (i = 0; prime_specs[i].prime_bits; i++)
     {
       err = gcry_prime_generate (&prime,
 				 prime_specs[i].prime_bits,
 				 prime_specs[i].factor_bits,
 				 &factors,
 				 NULL, NULL,
 				 GCRY_WEAK_RANDOM,
 				 prime_specs[i].flags);
       assert (! err);
       if (verbose)
         {
           fprintf (stderr, "test %d: p = ", i);
           gcry_mpi_dump (prime);
           putc ('\n', stderr);
         }
 
       err = gcry_prime_check (prime, 0);
       assert (! err);
 
       err = gcry_prime_group_generator (&g, prime, factors, NULL);
       assert (!err);
       gcry_prime_release_factors (factors); factors = NULL;
 
       if (verbose)
         {
           fprintf (stderr, "     %d: g = ", i);
           gcry_mpi_dump (g);
           putc ('\n', stderr);
         }
       gcry_mpi_release (g);
 
 
       gcry_mpi_add_ui (prime, prime, 1);
       err = gcry_prime_check (prime, 0);
       assert (err);
       gcry_mpi_release (prime); prime = NULL;
     }
 }
 
 
 /* Print an MPI S-expression.  */
 static void
 print_mpi (const char *name, gcry_mpi_t a)
 {
   gcry_error_t err;
   unsigned char *buf;
   int writerr = 0;
 
   err = gcry_mpi_aprint (GCRYMPI_FMT_HEX, &buf, NULL, a);
   if (err)
     die ("gcry_mpi_aprint failed: %s\n", gcry_strerror (err));
 
   printf ("  (%s #%s#)\n", name, buf);
   if (ferror (stdout))
     writerr++;
   if (!writerr && fflush (stdout) == EOF)
     writerr++;
   if (writerr)
     die ("writing output failed\n");
   gcry_free (buf);
 }
 
 
 /* Create the key for our public standard dummy CA.  */
 static void
 create_42prime (void)
 {
   gcry_error_t err;
   char string[128*2+1];
   int i;
   gcry_mpi_t start = NULL;
   gcry_mpi_t p, q, n, t1, t2, phi, f, g, e, d, u;
 
 
   /* Our start value is a string of 0x42 values, with the exception
      that the two high order bits are set.  This is to resemble the
      way Lingcrypt generates RSA primes.  */
   for (i=0; i < 128;)
     {
       string[i++] = '4';
       string[i++] = '2';
     }
   string[i] = 0;
   string[0] = 'C';
 
   err = gcry_mpi_scan (&start, GCRYMPI_FMT_HEX, string, 0, NULL);
   if (err)
     die ("gcry_mpi_scan failed: %s\n", gcry_strerror (err));
   fputs ("start:", stderr); gcry_mpi_dump (start); putc ('\n', stderr);
 
   /* Generate two primes with p < q.  We take the first primes below
      and above a start value. */
   p = gcry_mpi_copy (start);
   gcry_mpi_sub_ui (p, p, 1);
   while (gcry_prime_check (p, 0))
     gcry_mpi_sub_ui (p, p, 2);
   fputs ("    p:", stderr); gcry_mpi_dump (p); putc ('\n', stderr);
   q = gcry_mpi_copy (start);
   gcry_mpi_add_ui (q, q, 1);
   while (gcry_prime_check (q, 0))
     gcry_mpi_add_ui (q, q, 2);
   fputs ("    q:", stderr); gcry_mpi_dump (q); putc ('\n', stderr);
 
   /* Compute the modulus.  */
   n = gcry_mpi_new (1024);
   gcry_mpi_mul (n, p, q);
   fputs ("    n:", stderr); gcry_mpi_dump (n); putc ('\n', stderr);
   if (gcry_mpi_get_nbits (n) != 1024)
     die ("Oops: the size of N is not 1024 but %u\n", gcry_mpi_get_nbits (n));
 
   /* Calculate Euler totient: phi = (p-1)(q-1) */
   t1 = gcry_mpi_new (0);
   t2 = gcry_mpi_new (0);
   phi = gcry_mpi_new (0);
   g   = gcry_mpi_new (0);
   f   = gcry_mpi_new (0);
   gcry_mpi_sub_ui (t1, p, 1);
   gcry_mpi_sub_ui (t2, q, 1);
   gcry_mpi_mul (phi, t1, t2);
   gcry_mpi_gcd (g, t1, t2);
   gcry_mpi_div (f, NULL, phi, g, -1);
 
   /* Check the public exponent.  */
   e = gcry_mpi_set_ui (NULL, 65537);
   if (!gcry_mpi_gcd (t1, e, phi))
     die ("Oops: E is not a generator\n");
   fputs ("    e:", stderr); gcry_mpi_dump (e); putc ('\n', stderr);
 
   /* Compute the secret key:  d = e^-1 mod phi */
   d = gcry_mpi_new (0);
   gcry_mpi_invm (d, e, f );
   fputs ("    d:", stderr); gcry_mpi_dump (d); putc ('\n', stderr);
 
   /* Compute the inverse of p and q. */
   u = gcry_mpi_new (0);
   gcry_mpi_invm (u, p, q);
   fputs ("    u:", stderr); gcry_mpi_dump (u); putc ('\n', stderr);
 
   /* Print the S-expression.  */
   fputs ("(private-key\n (rsa\n", stdout);
   print_mpi ("n", n);
   print_mpi ("e", e);
   print_mpi ("d", d);
   print_mpi ("p", p);
   print_mpi ("q", q);
   print_mpi ("u", u);
   fputs ("))\n", stdout);
 
   gcry_mpi_release (p);
   gcry_mpi_release (q);
   gcry_mpi_release (n);
   gcry_mpi_release (t1);
   gcry_mpi_release (t2);
   gcry_mpi_release (phi);
   gcry_mpi_release (f);
   gcry_mpi_release (g);
   gcry_mpi_release (e);
   gcry_mpi_release (d);
   gcry_mpi_release (u);
 }
 
 
 
 
 int
 main (int argc, char **argv)
 {
   int mode42 = 0;
 
   if ((argc > 1) && (! strcmp (argv[1], "--verbose")))
     verbose = 1;
   else if ((argc > 1) && (! strcmp (argv[1], "--debug")))
     verbose = debug = 1;
   else if ((argc > 1) && (! strcmp (argv[1], "--42")))
     verbose = debug = mode42 = 1;
 
   xgcry_control ((GCRYCTL_DISABLE_SECMEM, 0));
   if (! gcry_check_version (GCRYPT_VERSION))
     die ("version mismatch\n");
 
   xgcry_control ((GCRYCTL_INITIALIZATION_FINISHED, 0));
   if (debug)
     xgcry_control ((GCRYCTL_SET_DEBUG_FLAGS, 1u, 0));
 
   if (mode42)
     create_42prime ();
   else
     check_primes ();
 
   return 0;
 }
diff --git a/tests/random.c b/tests/random.c
index 2ffd528b..e56223ba 100644
--- a/tests/random.c
+++ b/tests/random.c
@@ -1,826 +1,826 @@
 /* random.c - part of the Libgcrypt test suite.
    Copyright (C) 2005 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
 
    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.
 
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
-   USA.  */
+   along with this program; if not, see <https://www.gnu.org/licenses/>.
+   SPDX-License-Identifier: GPL-2-or-later
+*/
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <unistd.h>
 #ifndef HAVE_W32_SYSTEM
 # include <signal.h>
 # include <sys/wait.h>
 #endif
 
 #include "stopwatch.h"
 
 
 #define PGM "random"
 #define NEED_EXTRA_TEST_SUPPORT 1
 #include "t-common.h"
 
 static int with_progress;
 
 
 /* Prepend FNAME with the srcdir environment variable's value and
  * return an allocated filename.  */
 static char *
 prepend_srcdir (const char *fname)
 {
   static const char *srcdir;
   char *result;
 
   if (!srcdir && !(srcdir = getenv ("srcdir")))
     srcdir = ".";
 
   result = xmalloc (strlen (srcdir) + 1 + strlen (fname) + 1);
   strcpy (result, srcdir);
   strcat (result, "/");
   strcat (result, fname);
   return result;
 }
 
 
 static void
 print_hex (const char *text, const void *buf, size_t n)
 {
   const unsigned char *p = buf;
 
   info ("%s", text);
   for (; n; n--, p++)
     fprintf (stderr, "%02X", *p);
   putc ('\n', stderr);
 }
 
 
 static void
 progress_cb (void *cb_data, const char *what, int printchar,
              int current, int total)
 {
   (void)cb_data;
 
   info ("progress (%s %c %d %d)\n", what, printchar, current, total);
   fflush (stderr);
 }
 
 
 #ifndef HAVE_W32_SYSTEM
 static int
 writen (int fd, const void *buf, size_t nbytes)
 {
   size_t nleft = nbytes;
   int nwritten;
 
   while (nleft > 0)
     {
       nwritten = write (fd, buf, nleft);
       if (nwritten < 0)
         {
           if (errno == EINTR)
             nwritten = 0;
           else
             return -1;
         }
       nleft -= nwritten;
       buf = (const char*)buf + nwritten;
     }
 
   return 0;
 }
 #endif /*!HAVE_W32_SYSTEM*/
 
 
 #ifndef HAVE_W32_SYSTEM
 static int
 readn (int fd, void *buf, size_t buflen, size_t *ret_nread)
 {
   size_t nleft = buflen;
   int nread;
 
   while ( nleft > 0 )
     {
       nread = read ( fd, buf, nleft );
       if (nread < 0)
         {
           if (nread == EINTR)
             nread = 0;
           else
             return -1;
         }
       else if (!nread)
         break; /* EOF */
       nleft -= nread;
       buf = (char*)buf + nread;
     }
   if (ret_nread)
     *ret_nread = buflen - nleft;
   return 0;
 }
 #endif /*!HAVE_W32_SYSTEM*/
 
 
 /* Check that forking won't return the same random. */
 static void
 check_forking (void)
 {
 #ifdef HAVE_W32_SYSTEM
   if (verbose)
     info ("check_forking skipped: not applicable on Windows\n");
 #else /*!HAVE_W32_SYSTEM*/
   pid_t pid;
   int rp[2];
   int i, status;
   size_t nread;
   char tmp1[16], tmp1c[16], tmp1p[16];
 
   if (verbose)
     info ("checking that a fork won't cause the same random output\n");
 
   /* We better make sure that the RNG has been initialzied. */
   gcry_randomize (tmp1, sizeof tmp1, GCRY_STRONG_RANDOM);
   if (verbose)
     print_hex ("initial random: ", tmp1, sizeof tmp1);
 
   if (pipe (rp) == -1)
     die ("pipe failed: %s\n", strerror (errno));
 
   pid = fork ();
   if (pid == (pid_t)(-1))
     die ("fork failed: %s\n", strerror (errno));
   if (!pid)
     {
       gcry_randomize (tmp1c, sizeof tmp1c, GCRY_STRONG_RANDOM);
       if (writen (rp[1], tmp1c, sizeof tmp1c))
         die ("write failed: %s\n", strerror (errno));
       if (verbose)
         {
           print_hex ("  child random: ", tmp1c, sizeof tmp1c);
           fflush (stdout);
         }
       _exit (0);
     }
   gcry_randomize (tmp1p, sizeof tmp1p, GCRY_STRONG_RANDOM);
   if (verbose)
     print_hex (" parent random: ", tmp1p, sizeof tmp1p);
 
   close (rp[1]);
   if (readn (rp[0], tmp1c, sizeof tmp1c, &nread))
     die ("read failed: %s\n", strerror (errno));
   if (nread != sizeof tmp1c)
     die ("read too short\n");
 
   while ( (i=waitpid (pid, &status, 0)) == -1 && errno == EINTR)
     ;
   if (i != (pid_t)(-1)
       && WIFEXITED (status) && !WEXITSTATUS (status))
     ;
   else
     die ("child failed\n");
 
   if (!memcmp (tmp1p, tmp1c, sizeof tmp1c))
     die ("parent and child got the same random number\n");
 #endif  /*!HAVE_W32_SYSTEM*/
 }
 
 
 
 /* Check that forking won't return the same nonce. */
 static void
 check_nonce_forking (void)
 {
 #ifdef HAVE_W32_SYSTEM
   if (verbose)
     info ("check_nonce_forking skipped: not applicable on Windows\n");
 #else /*!HAVE_W32_SYSTEM*/
   pid_t pid;
   int rp[2];
   int i, status;
   size_t nread;
   char nonce1[10], nonce1c[10], nonce1p[10];
 
   if (verbose)
     info ("checking that a fork won't cause the same nonce output\n");
 
   /* We won't get the same nonce back if we never initialized the
      nonce subsystem, thus we get one nonce here and forget about
      it. */
   gcry_create_nonce (nonce1, sizeof nonce1);
   if (verbose)
     print_hex ("initial nonce: ", nonce1, sizeof nonce1);
 
   if (pipe (rp) == -1)
     die ("pipe failed: %s\n", strerror (errno));
 
   pid = fork ();
   if (pid == (pid_t)(-1))
     die ("fork failed: %s\n", strerror (errno));
   if (!pid)
     {
       gcry_create_nonce (nonce1c, sizeof nonce1c);
       if (writen (rp[1], nonce1c, sizeof nonce1c))
         die ("write failed: %s\n", strerror (errno));
       if (verbose)
         {
           print_hex ("  child nonce: ", nonce1c, sizeof nonce1c);
           fflush (stdout);
         }
       _exit (0);
     }
   gcry_create_nonce (nonce1p, sizeof nonce1p);
   if (verbose)
     print_hex (" parent nonce: ", nonce1p, sizeof nonce1p);
 
   close (rp[1]);
   if (readn (rp[0], nonce1c, sizeof nonce1c, &nread))
     die ("read failed: %s\n", strerror (errno));
   if (nread != sizeof nonce1c)
     die ("read too short\n");
 
   while ( (i=waitpid (pid, &status, 0)) == -1 && errno == EINTR)
     ;
   if (i != (pid_t)(-1)
       && WIFEXITED (status) && !WEXITSTATUS (status))
     ;
   else
     die ("child failed\n");
 
   if (!memcmp (nonce1p, nonce1c, sizeof nonce1c))
     die ("parent and child got the same nonce\n");
 #endif  /*!HAVE_W32_SYSTEM*/
 }
 
 
 /* Check that a closed random device os re-opened if needed. */
 static void
 check_close_random_device (void)
 {
 #ifdef HAVE_W32_SYSTEM
   if (verbose)
     info ("check_close_random_device skipped: not applicable on Windows\n");
 #else /*!HAVE_W32_SYSTEM*/
   pid_t pid;
   int i, status;
   char buf[4];
 
   if (verbose)
     info ("checking that close_random_device works\n");
 
   gcry_randomize (buf, sizeof buf, GCRY_STRONG_RANDOM);
   if (verbose)
     print_hex ("parent random: ", buf, sizeof buf);
 
   pid = fork ();
   if (pid == (pid_t)(-1))
     die ("fork failed: %s\n", strerror (errno));
   if (!pid)
     {
       xgcry_control ((GCRYCTL_CLOSE_RANDOM_DEVICE, 0));
 
       /* The next call will re-open the device.  */
       gcry_randomize (buf, sizeof buf, GCRY_STRONG_RANDOM);
       if (verbose)
         {
           print_hex ("child random : ", buf, sizeof buf);
           fflush (stdout);
         }
       _exit (0);
     }
 
   while ( (i=waitpid (pid, &status, 0)) == -1 && errno == EINTR)
     ;
   if (i != (pid_t)(-1)
       && WIFEXITED (status) && !WEXITSTATUS (status))
     ;
   else
     die ("child failed\n");
 
 #endif  /*!HAVE_W32_SYSTEM*/
 }
 
 
 static int
 rng_type (void)
 {
   int rngtype;
   if (gcry_control (GCRYCTL_GET_CURRENT_RNG_TYPE, &rngtype))
     die ("retrieving RNG type failed\n");
   return rngtype;
 }
 
 
 static void
 check_rng_type_switching (void)
 {
   int rngtype, initial;
   char tmp1[4];
 
   if (verbose)
     info ("checking whether RNG type switching works\n");
 
   rngtype = rng_type ();
   if (debug)
     info ("rng type: %d\n", rngtype);
   initial = rngtype;
   gcry_randomize (tmp1, sizeof tmp1, GCRY_STRONG_RANDOM);
   if (debug)
     print_hex ("  sample: ", tmp1, sizeof tmp1);
   if (rngtype != rng_type ())
     die ("RNG type unexpectedly changed\n");
 
   xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_SYSTEM));
 
   rngtype = rng_type ();
   if (debug)
     info ("rng type: %d\n", rngtype);
   if (rngtype != initial)
     die ("switching to System RNG unexpectedly succeeded\n");
   gcry_randomize (tmp1, sizeof tmp1, GCRY_STRONG_RANDOM);
   if (debug)
     print_hex ("  sample: ", tmp1, sizeof tmp1);
   if (rngtype != rng_type ())
     die ("RNG type unexpectedly changed\n");
 
   xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_FIPS));
 
   rngtype = rng_type ();
   if (debug)
     info ("rng type: %d\n", rngtype);
   if (rngtype != initial)
     die ("switching to FIPS RNG unexpectedly succeeded\n");
   gcry_randomize (tmp1, sizeof tmp1, GCRY_STRONG_RANDOM);
   if (debug)
     print_hex ("  sample: ", tmp1, sizeof tmp1);
   if (rngtype != rng_type ())
     die ("RNG type unexpectedly changed\n");
 
   xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_STANDARD));
 
   rngtype = rng_type ();
   if (debug)
     info ("rng type: %d\n", rngtype);
   if (rngtype != GCRY_RNG_TYPE_STANDARD)
     die ("switching to standard RNG failed\n");
   gcry_randomize (tmp1, sizeof tmp1, GCRY_STRONG_RANDOM);
   if (debug)
     print_hex ("  sample: ", tmp1, sizeof tmp1);
   if (rngtype != rng_type ())
     die ("RNG type unexpectedly changed\n");
 }
 
 
 static void
 check_early_rng_type_switching (void)
 {
   int rngtype, initial;
 
   if (verbose)
     info ("checking whether RNG type switching works in the early stage\n");
 
   rngtype = rng_type ();
   if (debug)
     info ("rng type: %d\n", rngtype);
   initial = rngtype;
 
   xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_SYSTEM));
 
   rngtype = rng_type ();
   if (debug)
     info ("rng type: %d\n", rngtype);
   if (initial >= GCRY_RNG_TYPE_SYSTEM && rngtype != GCRY_RNG_TYPE_SYSTEM)
     die ("switching to System RNG failed\n");
 
   xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_FIPS));
 
   rngtype = rng_type ();
   if (debug)
     info ("rng type: %d\n", rngtype);
   if (initial >= GCRY_RNG_TYPE_FIPS && rngtype != GCRY_RNG_TYPE_FIPS)
     die ("switching to FIPS RNG failed\n");
 
   xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_STANDARD));
 
   rngtype = rng_type ();
   if (debug)
     info ("rng type: %d\n", rngtype);
   if (rngtype != GCRY_RNG_TYPE_STANDARD)
     die ("switching to standard RNG failed\n");
 }
 
 
 static void
 check_drbg_reinit (void)
 {
   static struct { const char *flags; } tv[] = {
     { NULL },
     { "" },
     { "sha1" },
     { "sha1 pr" },
     { "sha256" },
     { "sha256 pr" },
     { "sha512" },
     { "sha512 pr" },
     { "hmac sha1" },
     { "hmac sha1 pr" },
     { "hmac sha256" },
     { "hmac sha256 pr" },
     { "hmac sha512" },
     { "hmac sha512 pr" },
     { "aes sym128" },
     { "aes sym128 pr" },
     { "aes sym192" },
     { "aes sym192 pr" },
     { "aes sym256" },
     { "aes sym256 pr" }
   };
   int tidx;
   gpg_error_t err;
   char pers_string[] = "I'm a doctor, not an engineer.";
   gcry_buffer_t pers[1];
 
   if (verbose)
     info ("checking DRBG_REINIT\n");
 
   memset (pers, 0, sizeof pers);
   pers[0].data = pers_string;
   pers[0].len = strlen (pers_string);
 
   err = gcry_control (GCRYCTL_DRBG_REINIT, "", NULL, 0, &err);
   if (gpg_err_code (err) != GPG_ERR_INV_ARG)
     die ("gcry_control(DRBG_REINIT) guard value did not work\n");
 
   err = gcry_control (GCRYCTL_DRBG_REINIT, "", NULL, -1, NULL);
   if (gpg_err_code (err) != GPG_ERR_INV_ARG)
     die ("gcry_control(DRBG_REINIT) npers negative detection failed\n");
 
   if (rng_type () != GCRY_RNG_TYPE_FIPS)
     {
       err = gcry_control (GCRYCTL_DRBG_REINIT, "", NULL, 0, NULL);
       if (gpg_err_code (err) != GPG_ERR_NOT_SUPPORTED)
         die ("DRBG_REINIT worked despite that DRBG is not active\n");
       return;
     }
 
   err = gcry_control (GCRYCTL_DRBG_REINIT, "", NULL, 1, NULL);
   if (gpg_err_code (err) != GPG_ERR_INV_ARG)
     die ("_gcry_rngdrbg_reinit failed to detact: (!pers && npers)\n");
   err = gcry_control (GCRYCTL_DRBG_REINIT, "", pers, 2, NULL);
   if (gpg_err_code (err) != GPG_ERR_INV_ARG)
     die ("_gcry_rngdrbg_reinit failed to detect: (pers && npers != 1)\n");
 
   err = gcry_control (GCRYCTL_DRBG_REINIT, "aes sym128 bad pr ", pers, 1, NULL);
   if (gpg_err_code (err) != GPG_ERR_INV_FLAG)
     die ("_gcry_rngdrbg_reinit failed to detect a bad flag\n");
 
   for (tidx=0; tidx < DIM(tv); tidx++)
     {
       err = gcry_control (GCRYCTL_DRBG_REINIT, tv[tidx].flags, NULL, 0, NULL);
       if (err)
         die ("_gcry_rngdrbg_reinit failed for \"%s\" w/o pers: %s\n",
 
              tv[tidx].flags, gpg_strerror (err));
       err = gcry_control (GCRYCTL_DRBG_REINIT, tv[tidx].flags, pers, 1, NULL);
       if (err)
         die ("_gcry_rngdrbg_reinit failed for \"%s\" with pers: %s\n",
              tv[tidx].flags, gpg_strerror (err));
       /* fixme: We should extract some random after each test.  */
     }
 }
 
 
 #if defined(USE_POSIX_SPAWN_FOR_TESTS) && defined (HAVE_SPAWN_H)
 #include <spawn.h>
 extern char **environ;
 
 static void
 run_all_rng_tests (const char *program)
 {
   static const char *options[][2] = {
     { "--early-rng-check",     NULL },
     { "--early-rng-check",     "--prefer-standard-rng" },
     { "--early-rng-check",     "--prefer-fips-rng" },
     { "--early-rng-check",     "--prefer-system-rng" },
     { "--prefer-standard-rng", NULL },
     { "--prefer-fips-rng",     NULL },
     { "--prefer-system-rng",   NULL },
     { NULL, NULL }
   };
   int idx;
   char *argv[8];
 
   for (idx=0; options[idx][0]; idx++)
     {
       int i;
       pid_t pid;
       int status;
 
       if (verbose)
         info ("now running with options '%s%s%s'\n",
               options[idx][0],
               options[idx][1] ? " " : "",
               options[idx][1] ? options[idx][1] : "");
 
       i = 0;
       argv[i++] = xstrdup (program);
       argv[i++] = xstrdup ("--in-recursion");
       argv[i++] = xstrdup ("--verbose");
       argv[i++] = xstrdup ("--debug");
       argv[i++] = xstrdup ("--progress");
       argv[i++] = xstrdup (options[idx][0]);
       if (options[idx][1])
         argv[i++] = xstrdup (options[idx][1]);
       argv[i++] = NULL;
 
       if (posix_spawn (&pid, program, NULL, NULL, argv, environ))
         die ("spawning '%s' failed\n", program);
 
       if (waitpid (pid, &status, 0) < 0)
         die ("waitpid for '%s' failed\n", program);
 
       if (WIFEXITED (status) && WEXITSTATUS (status))
         die ("running '%s' failed with %d\n", program, WEXITSTATUS (status));
       else if (!WIFEXITED (status))
         die ("running '%s' failed\n", program);
 
       while (i)
         xfree (argv[--i]);
     }
 }
 #else
 /* Because we want to check initialization behaviour, we need to
    fork/exec this program with several command line arguments.  We use
    system, so that these tests work also on Windows.  */
 static void
 run_all_rng_tests (const char *program)
 {
   static const char *options[] = {
     "--early-rng-check",
     "--early-rng-check --prefer-standard-rng",
     "--early-rng-check --prefer-fips-rng",
     "--early-rng-check --prefer-system-rng",
     "--prefer-standard-rng",
     "--prefer-fips-rng",
     "--prefer-system-rng",
     NULL
   };
   int idx;
   size_t len, maxlen;
   char *cmdline;
 
   maxlen = 0;
   for (idx=0; options[idx]; idx++)
     {
       len = strlen (options[idx]);
       if (len > maxlen)
         maxlen = len;
     }
   maxlen += strlen (program);
   maxlen += strlen (" --in-recursion --verbose --debug --progress");
   maxlen++;
   cmdline = malloc (maxlen + 1);
   if (!cmdline)
     die ("out of core\n");
 
   for (idx=0; options[idx]; idx++)
     {
       if (verbose)
         info ("now running with options '%s'\n", options[idx]);
       strcpy (cmdline, program);
       strcat (cmdline, " --in-recursion");
       if (verbose)
         strcat (cmdline, " --verbose");
       if (debug)
         strcat (cmdline, " --debug");
       if (with_progress)
         strcat (cmdline, " --progress");
       strcat (cmdline, " ");
       strcat (cmdline, options[idx]);
       if (system (cmdline))
         die ("running '%s' failed\n", cmdline);
     }
 
   free (cmdline);
 }
 #endif
 
 
 static void
 run_benchmark (void)
 {
   char rndbuf[32];
   int i, j;
 
   if (verbose)
     info ("benchmarking GCRY_STRONG_RANDOM (/dev/urandom)\n");
 
   start_timer ();
   gcry_randomize (rndbuf, sizeof rndbuf, GCRY_STRONG_RANDOM);
   stop_timer ();
 
   info ("getting first 256 bits: %s", elapsed_time (1));
 
   for (j=0; j < 5; j++)
     {
       start_timer ();
       for (i=0; i < 100; i++)
         gcry_randomize (rndbuf, sizeof rndbuf, GCRY_STRONG_RANDOM);
       stop_timer ();
 
       info ("100 calls of 256 bits each: %s", elapsed_time (100));
     }
 
 }
 
 
 int
 main (int argc, char **argv)
 {
   int last_argc = -1;
   int early_rng = 0;
   int in_recursion = 0;
   int benchmark = 0;
   int with_seed_file = 0;
   const char *program = NULL;
 
   if (argc)
     {
       program = *argv;
       argc--; argv++;
     }
   else
     die ("argv[0] missing\n");
 
   while (argc && last_argc != argc )
     {
       last_argc = argc;
       if (!strcmp (*argv, "--"))
         {
           argc--; argv++;
           break;
         }
       else if (!strcmp (*argv, "--help"))
         {
           fputs ("usage: random [options]\n", stdout);
           exit (0);
         }
       else if (!strcmp (*argv, "--verbose"))
         {
           verbose = 1;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--debug"))
         {
           debug = verbose = 1;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--progress"))
         {
           argc--; argv++;
           with_progress = 1;
         }
       else if (!strcmp (*argv, "--in-recursion"))
         {
           in_recursion = 1;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--benchmark"))
         {
           benchmark = 1;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--early-rng-check"))
         {
           early_rng = 1;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--with-seed-file"))
         {
           with_seed_file = 1;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--prefer-standard-rng"))
         {
           /* This is anyway the default, but we may want to use it for
              debugging. */
           xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE,
                           GCRY_RNG_TYPE_STANDARD));
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--prefer-fips-rng"))
         {
           xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_FIPS));
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--prefer-system-rng"))
         {
           xgcry_control ((GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_SYSTEM));
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--disable-hwf"))
         {
           argc--;
           argv++;
           if (argc)
             {
               if (gcry_control (GCRYCTL_DISABLE_HWF, *argv, NULL))
                 die ("unknown hardware feature `%s'\n", *argv);
               argc--;
               argv++;
             }
         }
     }
 
 #ifndef HAVE_W32_SYSTEM
   signal (SIGPIPE, SIG_IGN);
 #endif
 
   if (benchmark && !verbose)
     verbose = 1;
 
   if (early_rng)
     {
       /* Don't switch RNG in fips mode. */
       if (!gcry_fips_mode_active())
         check_early_rng_type_switching ();
     }
 
   xgcry_control ((GCRYCTL_DISABLE_SECMEM, 0));
   if (!gcry_check_version (GCRYPT_VERSION))
     die ("version mismatch\n");
 
   if (with_progress)
     gcry_set_progress_handler (progress_cb, NULL);
 
   if (with_seed_file)
     {
       char *fname = prepend_srcdir ("random.seed");
 
       if (access (fname, F_OK))
         info ("random seed file '%s' not found\n", fname);
       gcry_control (GCRYCTL_SET_RANDOM_SEED_FILE, fname);
       xfree (fname);
     }
 
   xgcry_control ((GCRYCTL_INITIALIZATION_FINISHED, 0));
   if (debug)
     xgcry_control ((GCRYCTL_SET_DEBUG_FLAGS, 1u, 0));
 
   if (benchmark)
     {
       run_benchmark ();
     }
   else if (!in_recursion)
     {
       check_forking ();
       check_nonce_forking ();
       check_close_random_device ();
     }
   /* For now we do not run the drgb_reinit check from "make check" due
      to its high requirement for entropy.  */
   if (!benchmark && !getenv ("GCRYPT_IN_REGRESSION_TEST"))
     check_drbg_reinit ();
 
   /* Don't switch RNG in fips mode.  */
   if (!benchmark && !gcry_fips_mode_active())
     check_rng_type_switching ();
 
   if (!in_recursion && !benchmark)
     run_all_rng_tests (program);
 
   /* Print this info last so that it does not influence the
    * initialization and thus the benchmarking.  */
   if (!in_recursion && verbose)
     {
       char *buf;
       char *fields[5];
 
       buf = gcry_get_config (0, "rng-type");
       if (buf
           && split_fields_colon (buf, fields, DIM (fields)) >= 5
           && atoi (fields[4]) > 0)
         info ("The JENT RNG was active\n");
       gcry_free (buf);
     }
 
   if (debug)
     xgcry_control ((GCRYCTL_DUMP_RANDOM_STATS));
 
   return 0;
 }
diff --git a/tests/t-kdf.c b/tests/t-kdf.c
index 716fb53e..10f64a7c 100644
--- a/tests/t-kdf.c
+++ b/tests/t-kdf.c
@@ -1,2015 +1,2015 @@
 /* t-kdf.c -  KDF regression tests
  * Copyright (C) 2011 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 #include <assert.h>
 
 #include "stopwatch.h"
 #define PGM "t-kdf"
 #include "t-common.h"
 
 static int in_fips_mode;
 
 
 static void
 dummy_consumer (volatile char *buffer, size_t buflen)
 {
   (void)buffer;
   (void)buflen;
 }
 
 
 static void
 bench_s2k (unsigned long s2kcount)
 {
   gpg_error_t err;
   const char passphrase[] = "123456789abcdef0";
   char keybuf[128/8];
   unsigned int repetitions = 10;
   unsigned int count;
   const char *elapsed;
   int pass = 0;
 
  again:
   start_timer ();
   for (count = 0; count < repetitions; count++)
     {
       err = gcry_kdf_derive (passphrase, strlen (passphrase),
                              GCRY_KDF_ITERSALTED_S2K,
                              GCRY_MD_SHA1, "saltsalt", 8, s2kcount,
                              sizeof keybuf, keybuf);
       if (err)
         die ("gcry_kdf_derive failed: %s\n", gpg_strerror (err));
       dummy_consumer (keybuf, sizeof keybuf);
     }
   stop_timer ();
 
   elapsed = elapsed_time (repetitions);
   if (!pass++)
     {
       if (!atoi (elapsed))
         {
           repetitions = 10000;
           goto again;
         }
       else if (atoi (elapsed) < 10)
         {
           repetitions = 100;
           goto again;
         }
     }
 
   printf ("%s\n", elapsed);
 }
 
 
 static void
 check_openpgp (void)
 {
   /* Test vectors manually created with gpg 1.4 derived code: In
      passphrase.c:hash_passpharse, add this code to the end of the
      function:
 
        ===8<===
        printf ("{\n"
                "  \"");
        for (i=0; i < pwlen; i++)
          {
            if (i && !(i%16))
              printf ("\"\n  \"");
            printf ("\\x%02x", ((const unsigned char *)pw)[i]);
          }
        printf ("\", %d,\n", pwlen);
 
        printf ("  %s, %s,\n",
                s2k->mode == 0? "GCRY_KDF_SIMPLE_S2K":
                s2k->mode == 1? "GCRY_KDF_SALTED_S2K":
                s2k->mode == 3? "GCRY_KDF_ITERSALTED_S2K":"?",
                s2k->hash_algo == DIGEST_ALGO_MD5   ? "GCRY_MD_MD5" :
                s2k->hash_algo == DIGEST_ALGO_SHA1  ? "GCRY_MD_SHA1" :
                s2k->hash_algo == DIGEST_ALGO_RMD160? "GCRY_MD_RMD160" :
                s2k->hash_algo == DIGEST_ALGO_SHA256? "GCRY_MD_SHA256" :
                s2k->hash_algo == DIGEST_ALGO_SHA384? "GCRY_MD_SHA384" :
                s2k->hash_algo == DIGEST_ALGO_SHA512? "GCRY_MD_SHA512" :
                s2k->hash_algo == DIGEST_ALGO_SHA224? "GCRY_MD_SHA224" : "?");
 
        if (s2k->mode == 0)
          printf ("  NULL, 0,\n");
        else
          {
            printf ("  \"");
            for (i=0; i < 8; i++)
              printf ("\\x%02x", (unsigned int)s2k->salt[i]);
            printf ("\", %d,\n", 8);
          }
 
        if (s2k->mode == 3)
          printf ("  %lu,\n", (unsigned long)S2K_DECODE_COUNT(s2k->count));
        else
          printf ("  0,\n");
 
        printf ("  %d,\n", (int)dek->keylen);
 
        printf ("  \"");
        for (i=0; i < dek->keylen; i++)
          {
            if (i && !(i%16))
              printf ("\"\n  \"");
            printf ("\\x%02x", ((unsigned char *)dek->key)[i]);
          }
        printf ("\"\n},\n");
        ===>8===
 
      Then prepare a file x.inp with utf8 encoding:
 
        ===8<===
        0 aes    md5 1024 a
        0 aes    md5 1024 ab
        0 aes    md5 1024 abc
        0 aes    md5 1024 abcd
        0 aes    md5 1024 abcde
        0 aes    md5 1024 abcdef
        0 aes    md5 1024 abcdefg
        0 aes    md5 1024 abcdefgh
        0 aes    md5 1024 abcdefghi
        0 aes    md5 1024 abcdefghijklmno
        0 aes    md5 1024 abcdefghijklmnop
        0 aes    md5 1024 abcdefghijklmnopq
        0 aes    md5 1024 Long_sentence_used_as_passphrase
        0 aes    md5 1024 With_utf8_umlauts:äüÖß
        0 aes    sha1 1024 a
        0 aes    sha1 1024 ab
        0 aes    sha1 1024 abc
        0 aes    sha1 1024 abcd
        0 aes    sha1 1024 abcde
        0 aes    sha1 1024 abcdef
        0 aes    sha1 1024 abcdefg
        0 aes    sha1 1024 abcdefgh
        0 aes    sha1 1024 abcdefghi
        0 aes    sha1 1024 abcdefghijklmno
        0 aes    sha1 1024 abcdefghijklmnop
        0 aes    sha1 1024 abcdefghijklmnopq
        0 aes    sha1 1024 abcdefghijklmnopqr
        0 aes    sha1 1024 abcdefghijklmnopqrs
        0 aes    sha1 1024 abcdefghijklmnopqrst
        0 aes    sha1 1024 abcdefghijklmnopqrstu
        0 aes    sha1 1024 Long_sentence_used_as_passphrase
        0 aes256 sha1 1024 Long_sentence_used_as_passphrase
        0 aes    sha1 1024 With_utf8_umlauts:äüÖß
        3 aes    sha1 1024 a
        3 aes    sha1 1024 ab
        3 aes    sha1 1024 abc
        3 aes    sha1 1024 abcd
        3 aes    sha1 1024 abcde
        3 aes    sha1 1024 abcdef
        3 aes    sha1 1024 abcdefg
        3 aes    sha1 1024 abcdefgh
        3 aes    sha1 1024 abcdefghi
        3 aes    sha1 1024 abcdefghijklmno
        3 aes    sha1 1024 abcdefghijklmnop
        3 aes    sha1 1024 abcdefghijklmnopq
        3 aes    sha1 1024 abcdefghijklmnopqr
        3 aes    sha1 1024 abcdefghijklmnopqrs
        3 aes    sha1 1024 abcdefghijklmnopqrst
        3 aes    sha1 1024 abcdefghijklmnopqrstu
        3 aes    sha1 1024 With_utf8_umlauts:äüÖß
        3 aes    sha1 1024 Long_sentence_used_as_passphrase
        3 aes    sha1 10240 Long_sentence_used_as_passphrase
        3 aes    sha1 102400 Long_sentence_used_as_passphrase
        3 aes192 sha1 1024 a
        3 aes192 sha1 1024 abcdefg
        3 aes192 sha1 1024 abcdefghi
        3 aes192 sha1 1024 abcdefghi
        3 aes192 sha1 1024 Long_sentence_used_as_passphrase
        3 aes256 sha1 1024 a
        3 aes256 sha1 1024 abcdefg
        3 aes256 sha1 1024 abcdefghi
        3 aes256 sha1 1024 abcdefghi
        3 aes256 sha1 1024 Long_sentence_used_as_passphrase
        0 aes    sha256 1024 Long_sentence_used_as_passphrase
        1 aes    sha256 1024 Long_sentence_used_as_passphrase
        3 aes    sha256 1024 Long_sentence_used_as_passphrase
        3 aes    sha256 10240 Long_sentence_used_as_passphrase
        3 aes    sha384 1024 Long_sentence_used_as_passphrase
        3 aes    sha512 1024 Long_sentence_used_as_passphrase
        3 aes256 sha512 1024 Long_sentence_used_as_passphrase
        3 3des   sha512 1024 Long_sentence_used_as_passphrase
        ===>8===
 
     and finally using a proper utf-8 enabled shell, run:
 
        cat x.inp | while read mode cipher digest count pass dummy; do \
          ./gpg </dev/null -o /dev/null -c  --passphrase "$pass" \
            --s2k-mode $mode --s2k-digest $digest --s2k-count $count \
            --cipher-algo $cipher ; done >x.out
   */
   static struct {
     const char *p;   /* Passphrase.  */
     size_t plen;     /* Length of P. */
     int algo;
     int hashalgo;
     const char *salt;
     size_t saltlen;
     unsigned long c; /* Iterations.  */
     int dklen;       /* Requested key length.  */
     const char *dk;  /* Derived key.  */
     int disabled;
   } tv[] = {
     {
       "\x61", 1,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x0c\xc1\x75\xb9\xc0\xf1\xb6\xa8\x31\xc3\x99\xe2\x69\x77\x26\x61"
     },
     {
       "\x61\x62", 2,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x18\x7e\xf4\x43\x61\x22\xd1\xcc\x2f\x40\xdc\x2b\x92\xf0\xeb\xa0"
     },
     {
       "\x61\x62\x63", 3,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x90\x01\x50\x98\x3c\xd2\x4f\xb0\xd6\x96\x3f\x7d\x28\xe1\x7f\x72"
     },
     {
       "\x61\x62\x63\x64", 4,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\xe2\xfc\x71\x4c\x47\x27\xee\x93\x95\xf3\x24\xcd\x2e\x7f\x33\x1f"
     },
     {
       "\x61\x62\x63\x64\x65", 5,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\xab\x56\xb4\xd9\x2b\x40\x71\x3a\xcc\x5a\xf8\x99\x85\xd4\xb7\x86"
     },
     {
       "\x61\x62\x63\x64\x65\x66", 6,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\xe8\x0b\x50\x17\x09\x89\x50\xfc\x58\xaa\xd8\x3c\x8c\x14\x97\x8e"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67", 7,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x7a\xc6\x6c\x0f\x14\x8d\xe9\x51\x9b\x8b\xd2\x64\x31\x2c\x4d\x64"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68", 8,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\xe8\xdc\x40\x81\xb1\x34\x34\xb4\x51\x89\xa7\x20\xb7\x7b\x68\x18"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69", 9,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x8a\xa9\x9b\x1f\x43\x9f\xf7\x12\x93\xe9\x53\x57\xba\xc6\xfd\x94"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f", 15,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x8a\x73\x19\xdb\xf6\x54\x4a\x74\x22\xc9\xe2\x54\x52\x58\x0e\xa5"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70", 16,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x1d\x64\xdc\xe2\x39\xc4\x43\x7b\x77\x36\x04\x1d\xb0\x89\xe1\xb9"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71", 17,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x9a\x8d\x98\x45\xa6\xb4\xd8\x2d\xfc\xb2\xc2\xe3\x51\x62\xc8\x30"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x35\x2a\xf0\xfc\xdf\xe9\xbb\x62\x16\xfc\x99\x9d\x8d\x58\x05\xcb"
     },
     {
       "\x57\x69\x74\x68\x5f\x75\x74\x66\x38\x5f\x75\x6d\x6c\x61\x75\x74"
       "\x73\x3a\xc3\xa4\xc3\xbc\xc3\x96\xc3\x9f", 26,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_MD5,
       NULL, 0,
       0,
       16,
       "\x21\xa4\xeb\xd8\xfd\xf0\x59\x25\xd1\x32\x31\xdb\xe7\xf2\x13\x5d"
     },
     {
       "\x61", 1,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x86\xf7\xe4\x37\xfa\xa5\xa7\xfc\xe1\x5d\x1d\xdc\xb9\xea\xea\xea"
     },
     {
       "\x61\x62", 2,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\xda\x23\x61\x4e\x02\x46\x9a\x0d\x7c\x7b\xd1\xbd\xab\x5c\x9c\x47"
     },
     {
       "\x61\x62\x63", 3,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\xa9\x99\x3e\x36\x47\x06\x81\x6a\xba\x3e\x25\x71\x78\x50\xc2\x6c"
     },
     {
       "\x61\x62\x63\x64", 4,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x81\xfe\x8b\xfe\x87\x57\x6c\x3e\xcb\x22\x42\x6f\x8e\x57\x84\x73"
     },
     {
       "\x61\x62\x63\x64\x65", 5,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x03\xde\x6c\x57\x0b\xfe\x24\xbf\xc3\x28\xcc\xd7\xca\x46\xb7\x6e"
     },
     {
       "\x61\x62\x63\x64\x65\x66", 6,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x1f\x8a\xc1\x0f\x23\xc5\xb5\xbc\x11\x67\xbd\xa8\x4b\x83\x3e\x5c"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67", 7,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x2f\xb5\xe1\x34\x19\xfc\x89\x24\x68\x65\xe7\xa3\x24\xf4\x76\xec"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68", 8,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x42\x5a\xf1\x2a\x07\x43\x50\x2b\x32\x2e\x93\xa0\x15\xbc\xf8\x68"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69", 9,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\xc6\x3b\x19\xf1\xe4\xc8\xb5\xf7\x6b\x25\xc4\x9b\x8b\x87\xf5\x7d"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f", 15,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x29\x38\xdc\xc2\xe3\xaa\x77\x98\x7c\x7e\x5d\x4a\x0f\x26\x96\x67"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70", 16,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x14\xf3\x99\x52\x88\xac\xd1\x89\xe6\xe5\x0a\x7a\xf4\x7e\xe7\x09"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71", 17,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\xd8\x3d\x62\x1f\xcd\x2d\x4d\x29\x85\x54\x70\x43\xa7\xa5\xfd\x4d"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71\x72", 18,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\xe3\x81\xfe\x42\xc5\x7e\x48\xa0\x82\x17\x86\x41\xef\xfd\x1c\xb9"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71\x72\x73", 19,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x89\x3e\x69\xff\x01\x09\xf3\x45\x9c\x42\x43\x01\x3b\x3d\xe8\xb1"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71\x72\x73\x74", 20,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x14\xa2\x3a\xd7\x0f\x2a\x5d\xd7\x25\x57\x5d\xe6\xc4\x3e\x1c\xdd"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71\x72\x73\x74\x75", 21,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\xec\xa9\x86\xb9\x5d\x58\x7f\x34\xd7\x1c\xa7\x75\x2a\x4e\x00\x10"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\x3e\x1b\x9a\x50\x7d\x6e\x9a\xd8\x93\x64\x96\x7a\x3f\xcb\x27\x3f"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       32,
       "\x3e\x1b\x9a\x50\x7d\x6e\x9a\xd8\x93\x64\x96\x7a\x3f\xcb\x27\x3f"
       "\xc3\x7b\x3a\xb2\xef\x4d\x68\xaa\x9c\xd7\xe4\x88\xee\xd1\x5e\x70"
     },
     {
       "\x57\x69\x74\x68\x5f\x75\x74\x66\x38\x5f\x75\x6d\x6c\x61\x75\x74"
       "\x73\x3a\xc3\xa4\xc3\xbc\xc3\x96\xc3\x9f", 26,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA1,
       NULL, 0,
       0,
       16,
       "\xe0\x4e\x1e\xe3\xad\x0b\x49\x7c\x7a\x5f\x37\x3b\x4d\x90\x3c\x2e"
     },
     {
       "\x61", 1,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x6d\x47\xe3\x68\x5d\x2c\x36\x16", 8,
       1024,
       16,
       "\x41\x9f\x48\x6e\xbf\xe6\xdd\x05\x9a\x72\x23\x17\x44\xd8\xd3\xf3"
     },
     {
       "\x61\x62", 2,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x7c\x34\x78\xfb\x28\x2d\x25\xc7", 8,
       1024,
       16,
       "\x0a\x9d\x09\x06\x43\x3d\x4f\xf9\x87\xd6\xf7\x48\x90\xde\xd1\x1c"
     },
     {
       "\x61\x62\x63", 3,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xc3\x16\x37\x2e\x27\xf6\x9f\x6f", 8,
       1024,
       16,
       "\xf8\x27\xa0\x07\xc6\xcb\xdd\xf1\xfe\x5c\x88\x3a\xfc\xcd\x84\x4d"
     },
     {
       "\x61\x62\x63\x64", 4,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xf0\x0c\x73\x38\xb7\xc3\xd5\x14", 8,
       1024,
       16,
       "\x9b\x5f\x26\xba\x52\x3b\xcd\xd9\xa5\x2a\xef\x3c\x03\x4d\xd1\x52"
     },
     {
       "\x61\x62\x63\x64\x65", 5,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xe1\x7d\xa2\x36\x09\x59\xee\xc5", 8,
       1024,
       16,
       "\x94\x9d\x5b\x1a\x5a\x66\x8c\xfa\x8f\x6f\x22\xaf\x8b\x60\x9f\xaf"
     },
     {
       "\x61\x62\x63\x64\x65\x66", 6,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xaf\xa7\x0c\x68\xdf\x7e\xaa\x27", 8,
       1024,
       16,
       "\xe5\x38\xf4\x39\x62\x27\xcd\xcc\x91\x37\x7f\x1b\xdc\x58\x64\x27"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67", 7,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x40\x57\xb2\x9d\x5f\xbb\x11\x4f", 8,
       1024,
       16,
       "\xad\xa2\x33\xd9\xdd\xe0\xfb\x94\x8e\xcc\xec\xcc\xb3\xa8\x3a\x9e"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68", 8,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x38\xf5\x65\xc5\x0f\x8c\x19\x61", 8,
       1024,
       16,
       "\xa0\xb0\x3e\x29\x76\xe6\x8f\xa0\xd8\x34\x8f\xa4\x2d\xfd\x65\xee"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69", 9,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xc3\xb7\x99\xcc\xda\x2d\x05\x7b", 8,
       1024,
       16,
       "\x27\x21\xc8\x99\x5f\xcf\x20\xeb\xf2\xd9\xff\x6a\x69\xff\xad\xe8"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f", 15,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x7d\xd8\x68\x8a\x1c\xc5\x47\x22", 8,
       1024,
       16,
       "\x0f\x96\x7a\x12\x23\x54\xf6\x92\x61\x67\x07\xb4\x68\x17\xb8\xaa"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70", 16,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x8a\x95\xd4\x88\x0b\xb8\xe9\x9d", 8,
       1024,
       16,
       "\xcc\xe4\xc8\x82\x53\x32\xf1\x93\x5a\x00\xd4\x7f\xd4\x46\xfa\x07"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71", 17,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xb5\x22\x48\xa6\xc4\xad\x74\x67", 8,
       1024,
       16,
       "\x0c\xe3\xe0\xee\x3d\x8f\x35\xd2\x35\x14\x14\x29\x0c\xf1\xe3\x34"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71\x72", 18,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xac\x9f\x04\x63\x83\x0e\x3c\x95", 8,
       1024,
       16,
       "\x49\x0a\x04\x68\xa8\x2a\x43\x6f\xb9\x73\x94\xb4\x85\x9a\xaa\x0e"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71\x72\x73", 19,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x03\x6f\x60\x30\x3a\x19\x61\x0d", 8,
       1024,
       16,
       "\x15\xe5\x9b\xbf\x1c\xf0\xbe\x74\x95\x1a\xb2\xc4\xda\x09\xcd\x99"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71\x72\x73\x74", 20,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x51\x40\xa5\x57\xf5\x28\xfd\x03", 8,
       1024,
       16,
       "\xa6\xf2\x7e\x6b\x30\x4d\x8d\x67\xd4\xa2\x7f\xa2\x57\x27\xab\x96"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
       "\x71\x72\x73\x74\x75", 21,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x4c\xf1\x10\x11\x04\x70\xd3\x6e", 8,
       1024,
       16,
       "\x2c\x50\x79\x8d\x83\x23\xac\xd6\x22\x29\x37\xaf\x15\x0d\xdd\x8f"
     },
     {
       "\x57\x69\x74\x68\x5f\x75\x74\x66\x38\x5f\x75\x6d\x6c\x61\x75\x74"
       "\x73\x3a\xc3\xa4\xc3\xbc\xc3\x96\xc3\x9f", 26,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xfe\x3a\x25\xcb\x78\xef\xe1\x21", 8,
       1024,
       16,
       "\x2a\xb0\x53\x08\xf3\x2f\xd4\x6e\xeb\x01\x49\x5d\x87\xf6\x27\xf6"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x04\x97\xd0\x02\x6a\x44\x2d\xde", 8,
       1024,
       16,
       "\x57\xf5\x70\x41\xa0\x9b\x8c\x09\xca\x74\xa9\x22\xa5\x82\x2d\x17"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xdd\xf3\x31\x7c\xce\xf4\x81\x26", 8,
       10240,
       16,
       "\xc3\xdd\x01\x6d\xaf\xf6\x58\xc8\xd7\x79\xb4\x40\x00\xb5\xe8\x0b"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x95\xd6\x72\x4e\xfb\xe1\xc3\x1a", 8,
       102400,
       16,
       "\xf2\x3f\x36\x7f\xb4\x6a\xd0\x3a\x31\x9e\x65\x11\x8e\x2b\x99\x9b"
     },
     {
       "\x61", 1,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x6d\x69\x15\x18\xe4\x13\x42\x82", 8,
       1024,
       24,
       "\x28\x0c\x7e\xf2\x31\xf6\x1c\x6b\x5c\xef\x6a\xd5\x22\x64\x97\x91"
       "\xe3\x5e\x37\xfd\x50\xe2\xfc\x6c"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67", 7,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x9b\x76\x5e\x81\xde\x13\xdf\x15", 8,
       1024,
       24,
       "\x91\x1b\xa1\xc1\x7b\x4f\xc3\xb1\x80\x61\x26\x08\xbe\x53\xe6\x50"
       "\x40\x6f\x28\xed\xc6\xe6\x67\x55"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69", 9,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x7a\xac\xcc\x6e\x15\x56\xbd\xa1", 8,
       1024,
       24,
       "\xfa\x7e\x20\x07\xb6\x47\xb0\x09\x46\xb8\x38\xfb\xa1\xaf\xf7\x75"
       "\x2a\xfa\x77\x14\x06\x54\xcb\x34"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69", 9,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x1c\x68\xf8\xfb\x98\xf7\x8c\x39", 8,
       1024,
       24,
       "\xcb\x1e\x86\xf5\xe0\xe4\xfb\xbf\x71\x34\x99\x24\xf4\x39\x8c\xc2"
       "\x8e\x25\x1c\x4c\x96\x47\x22\xe8"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x10\xa9\x4e\xc1\xa5\xec\x17\x52", 8,
       1024,
       24,
       "\x0f\x83\xa2\x77\x92\xbb\xe4\x58\x68\xc5\xf2\x14\x6e\x6e\x2e\x6b"
       "\x98\x17\x70\x92\x07\x44\xe0\x51"
     },
     {
       "\x61", 1,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xef\x8f\x37\x61\x8f\xab\xae\x4f", 8,
       1024,
       32,
       "\x6d\x65\xae\x86\x23\x91\x39\x98\xec\x1c\x23\x44\xb6\x0d\xad\x32"
       "\x54\x46\xc7\x23\x26\xbb\xdf\x4b\x54\x6e\xd4\xc2\xfa\xc6\x17\x17"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67", 7,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xaa\xfb\xd9\x06\x7d\x7c\x40\xaf", 8,
       1024,
       32,
       "\x7d\x10\x54\x13\x3c\x43\x7a\xb3\x54\x1f\x38\xd4\x8f\x70\x0a\x09"
       "\xe2\xfa\xab\x97\x9a\x70\x16\xef\x66\x68\xca\x34\x2e\xce\xfa\x1f"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69", 9,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x58\x03\x4f\x56\x8b\x97\xd4\x98", 8,
       1024,
       32,
       "\xf7\x40\xb1\x25\x86\x0d\x35\x8f\x9f\x91\x2d\xce\x04\xee\x5a\x04"
       "\x9d\xbd\x44\x23\x4c\xa6\xbb\xab\xb0\xd0\x56\x82\xa9\xda\x47\x16"
     },
     {
       "\x61\x62\x63\x64\x65\x66\x67\x68\x69", 9,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\x5d\x41\x3d\xa3\xa7\xfc\x5d\x0c", 8,
       1024,
       32,
       "\x4c\x7a\x86\xed\x81\x8a\x94\x99\x7d\x4a\xc4\xf7\x1c\xf8\x08\xdb"
       "\x09\x35\xd9\xa3\x2d\x22\xde\x32\x2d\x74\x38\xe5\xc8\xf2\x50\x6e"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA1,
       "\xca\xa7\xdc\x59\xce\x31\xe7\x49", 8,
       1024,
       32,
       "\x67\xe9\xd6\x29\x49\x1c\xb6\xa0\x85\xe8\xf9\x8b\x85\x47\x3a\x7e"
       "\xa7\xee\x89\x52\x6f\x19\x00\x53\x93\x07\x0a\x8b\xb9\xa8\x86\x94"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_SIMPLE_S2K, GCRY_MD_SHA256,
       NULL, 0,
       0,
       16,
       "\x88\x36\x78\x6b\xd9\x5a\x62\xff\x47\xd3\xfb\x79\xc9\x08\x70\x56"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_SALTED_S2K, GCRY_MD_SHA256,
       "\x05\x8b\xfe\x31\xaa\xf3\x29\x11", 8,
       0,
       16,
       "\xb2\x42\xfe\x5e\x09\x02\xd9\x62\xb9\x35\xf3\xa8\x43\x80\x9f\xb1"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA256,
       "\xd3\x4a\xea\xc9\x97\x1b\xcc\x83", 8,
       1024,
       16,
       "\x35\x37\x99\x62\x07\x26\x68\x23\x05\x47\xb2\xa0\x0b\x2b\x2b\x8d"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA256,
       "\x5e\x71\xbd\x00\x5f\x96\xc4\x23", 8,
       10240,
       16,
       "\xa1\x6a\xee\xba\xde\x73\x25\x25\xd1\xab\xa0\xc5\x7e\xc6\x39\xa7"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA384,
       "\xc3\x08\xeb\x17\x62\x08\x89\xef", 8,
       1024,
       16,
       "\x9b\x7f\x0c\x81\x6f\x71\x59\x9b\xd5\xf6\xbf\x3a\x86\x20\x16\x33"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA512,
       "\xe6\x7d\x13\x6b\x39\xe3\x44\x05", 8,
       1024,
       16,
       "\xc8\xcd\x4b\xa4\xf3\xf1\xd5\xb0\x59\x06\xf0\xbb\x89\x34\x6a\xad"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA512,
       "\xed\x7d\x30\x47\xe4\xc3\xf8\xb6", 8,
       1024,
       32,
       "\x89\x7a\xef\x70\x97\xe7\x10\xdb\x75\xcc\x20\x22\xab\x7b\xf3\x05"
       "\x4b\xb6\x2e\x17\x11\x9f\xd6\xeb\xbf\xdf\x4d\x70\x59\xf0\xf9\xe5"
     },
     {
       "\x4c\x6f\x6e\x67\x5f\x73\x65\x6e\x74\x65\x6e\x63\x65\x5f\x75\x73"
       "\x65\x64\x5f\x61\x73\x5f\x70\x61\x73\x73\x70\x68\x72\x61\x73\x65", 32,
       GCRY_KDF_ITERSALTED_S2K, GCRY_MD_SHA512,
       "\xbb\x1a\x45\x30\x68\x62\x6d\x63", 8,
       1024,
       24,
       "\xde\x5c\xb8\xd5\x75\xf6\xad\x69\x5b\xc9\xf6\x2f\xba\xeb\xfb\x36"
       "\x34\xf2\xb8\xee\x3b\x37\x21\xb7"
     }
   };
   int tvidx;
   gpg_error_t err;
   unsigned char outbuf[32];
   int i;
 
   for (tvidx=0; tvidx < DIM(tv); tvidx++)
     {
       if (tv[tvidx].disabled)
         continue;
       /* MD5 isn't supported in fips mode */
       if (in_fips_mode && tv[tvidx].hashalgo == GCRY_MD_MD5)
         continue;
       if (gcry_md_test_algo (tv[tvidx].hashalgo) != 0)
         continue;
       if (verbose)
         fprintf (stderr, "checking S2K test vector %d\n", tvidx);
       assert (tv[tvidx].dklen <= sizeof outbuf);
       err = gcry_kdf_derive (tv[tvidx].p, tv[tvidx].plen,
                              tv[tvidx].algo, tv[tvidx].hashalgo,
                              tv[tvidx].salt, tv[tvidx].saltlen,
                              tv[tvidx].c, tv[tvidx].dklen, outbuf);
       if (err)
         fail ("s2k test %d failed: %s\n", tvidx, gpg_strerror (err));
       else if (memcmp (outbuf, tv[tvidx].dk, tv[tvidx].dklen))
         {
           fail ("s2k test %d failed: mismatch\n", tvidx);
           fputs ("got:", stderr);
           for (i=0; i < tv[tvidx].dklen; i++)
             fprintf (stderr, " %02x", outbuf[i]);
           putc ('\n', stderr);
         }
     }
 }
 
 
 static void
 check_pbkdf2 (void)
 {
   /* Test vectors are from RFC-6070.  */
   static struct {
     const char *p;   /* Passphrase.  */
     size_t plen;     /* Length of P. */
     const char *salt;
     size_t saltlen;
     int hashalgo;
     unsigned long c; /* Iterations.  */
     int dklen;       /* Requested key length.  */
     const char *dk;  /* Derived key.  */
     int disabled;
   } tv[] = {
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_SHA1,
       1,
       20,
       "\x0c\x60\xc8\x0f\x96\x1f\x0e\x71\xf3\xa9"
       "\xb5\x24\xaf\x60\x12\x06\x2f\xe0\x37\xa6"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_SHA1,
       1,
       10, /* too short dklen for FIPS */
       "\x0c\x60\xc8\x0f\x96\x1f\x0e\x71\xf3\xa9"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_SHA1,
       2,
       20,
       "\xea\x6c\x01\x4d\xc7\x2d\x6f\x8c\xcd\x1e"
       "\xd9\x2a\xce\x1d\x41\xf0\xd8\xde\x89\x57"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_SHA1,
       4096,
       20,
       "\x4b\x00\x79\x01\xb7\x65\x48\x9a\xbe\xad"
       "\x49\xd9\x26\xf7\x21\xd0\x65\xa4\x29\xc1"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_SHA1,
       16777216,
       20,
       "\xee\xfe\x3d\x61\xcd\x4d\xa4\xe4\xe9\x94"
       "\x5b\x3d\x6b\xa2\x15\x8c\x26\x34\xe9\x84",
       1 /* This test takes too long.  */
     },
     {
       "passwordPASSWORDpassword", 24,
       "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36,
       GCRY_MD_SHA1,
       4096,
       25,
       "\x3d\x2e\xec\x4f\xe4\x1c\x84\x9b\x80\xc8"
       "\xd8\x36\x62\xc0\xe4\x4a\x8b\x29\x1a\x96"
       "\x4c\xf2\xf0\x70\x38"
     },
     {
       "pass\0word", 9,
       "sa\0lt", 5,
       GCRY_MD_SHA1,
       4096,
       16,
       "\x56\xfa\x6a\xa7\x55\x48\x09\x9d\xcc\x37"
       "\xd7\xf0\x34\x25\xe0\xc3"
     },
     { /* empty password test, not in RFC-6070 */
       "", 0,
       "salt", 4,
       GCRY_MD_SHA1,
       2,
       20,
       "\x13\x3a\x4c\xe8\x37\xb4\xd2\x52\x1e\xe2"
       "\xbf\x03\xe1\x1c\x71\xca\x79\x4e\x07\x97"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_GOSTR3411_CP,
       1,
       32,
       "\x73\x14\xe7\xc0\x4f\xb2\xe6\x62\xc5\x43\x67\x42\x53\xf6\x8b\xd0"
       "\xb7\x34\x45\xd0\x7f\x24\x1b\xed\x87\x28\x82\xda\x21\x66\x2d\x58"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_GOSTR3411_CP,
       2,
       32,
       "\x99\x0d\xfa\x2b\xd9\x65\x63\x9b\xa4\x8b\x07\xb7\x92\x77\x5d\xf7"
       "\x9f\x2d\xb3\x4f\xef\x25\xf2\x74\x37\x88\x72\xfe\xd7\xed\x1b\xb3"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_GOSTR3411_CP,
       4096,
       32,
       "\x1f\x18\x29\xa9\x4b\xdf\xf5\xbe\x10\xd0\xae\xb3\x6a\xf4\x98\xe7"
       "\xa9\x74\x67\xf3\xb3\x11\x16\xa5\xa7\xc1\xaf\xff\x9d\xea\xda\xfe"
     },
     /* { -- takes too long (4-5 min) to calculate
       "password", 8,
       "salt", 4,
       GCRY_MD_GOSTR3411_CP,
       16777216,
       32,
       "\xa5\x7a\xe5\xa6\x08\x83\x96\xd1\x20\x85\x0c\x5c\x09\xde\x0a\x52"
       "\x51\x00\x93\x8a\x59\xb1\xb5\xc3\xf7\x81\x09\x10\xd0\x5f\xcd\x97"
     }, */
     {
       "passwordPASSWORDpassword", 24,
       "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36,
       GCRY_MD_GOSTR3411_CP,
       4096,
       40,
       "\x78\x83\x58\xc6\x9c\xb2\xdb\xe2\x51\xa7\xbb\x17\xd5\xf4\x24\x1f"
       "\x26\x5a\x79\x2a\x35\xbe\xcd\xe8\xd5\x6f\x32\x6b\x49\xc8\x50\x47"
       "\xb7\x63\x8a\xcb\x47\x64\xb1\xfd"
     },
     {
       "pass\0word", 9,
       "sa\0lt", 5,
       GCRY_MD_GOSTR3411_CP,
       4096,
       20,
       "\x43\xe0\x6c\x55\x90\xb0\x8c\x02\x25\x24"
       "\x23\x73\x12\x7e\xdf\x9c\x8e\x9c\x32\x91"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_STRIBOG512,
       1,
       64,
       "\x64\x77\x0a\xf7\xf7\x48\xc3\xb1\xc9\xac\x83\x1d\xbc\xfd\x85\xc2"
       "\x61\x11\xb3\x0a\x8a\x65\x7d\xdc\x30\x56\xb8\x0c\xa7\x3e\x04\x0d"
       "\x28\x54\xfd\x36\x81\x1f\x6d\x82\x5c\xc4\xab\x66\xec\x0a\x68\xa4"
       "\x90\xa9\xe5\xcf\x51\x56\xb3\xa2\xb7\xee\xcd\xdb\xf9\xa1\x6b\x47"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_STRIBOG512,
       2,
       64,
       "\x5a\x58\x5b\xaf\xdf\xbb\x6e\x88\x30\xd6\xd6\x8a\xa3\xb4\x3a\xc0"
       "\x0d\x2e\x4a\xeb\xce\x01\xc9\xb3\x1c\x2c\xae\xd5\x6f\x02\x36\xd4"
       "\xd3\x4b\x2b\x8f\xbd\x2c\x4e\x89\xd5\x4d\x46\xf5\x0e\x47\xd4\x5b"
       "\xba\xc3\x01\x57\x17\x43\x11\x9e\x8d\x3c\x42\xba\x66\xd3\x48\xde"
     },
     {
       "password", 8,
       "salt", 4,
       GCRY_MD_STRIBOG512,
       4096,
       64,
       "\xe5\x2d\xeb\x9a\x2d\x2a\xaf\xf4\xe2\xac\x9d\x47\xa4\x1f\x34\xc2"
       "\x03\x76\x59\x1c\x67\x80\x7f\x04\x77\xe3\x25\x49\xdc\x34\x1b\xc7"
       "\x86\x7c\x09\x84\x1b\x6d\x58\xe2\x9d\x03\x47\xc9\x96\x30\x1d\x55"
       "\xdf\x0d\x34\xe4\x7c\xf6\x8f\x4e\x3c\x2c\xda\xf1\xd9\xab\x86\xc3"
     },
     /* { -- takes toooo long
       "password", 8,
       "salt", 4,
       GCRY_MD_STRIBOG512,
       16777216,
       64,
       "\x49\xe4\x84\x3b\xba\x76\xe3\x00\xaf\xe2\x4c\x4d\x23\xdc\x73\x92"
       "\xde\xf1\x2f\x2c\x0e\x24\x41\x72\x36\x7c\xd7\x0a\x89\x82\xac\x36"
       "\x1a\xdb\x60\x1c\x7e\x2a\x31\x4e\x8c\xb7\xb1\xe9\xdf\x84\x0e\x36"
       "\xab\x56\x15\xbe\x5d\x74\x2b\x6c\xf2\x03\xfb\x55\xfd\xc4\x80\x71"
     }, */
     {
       "passwordPASSWORDpassword", 24,
       "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36,
       GCRY_MD_STRIBOG512,
       4096,
       100,
       "\xb2\xd8\xf1\x24\x5f\xc4\xd2\x92\x74\x80\x20\x57\xe4\xb5\x4e\x0a"
       "\x07\x53\xaa\x22\xfc\x53\x76\x0b\x30\x1c\xf0\x08\x67\x9e\x58\xfe"
       "\x4b\xee\x9a\xdd\xca\xe9\x9b\xa2\xb0\xb2\x0f\x43\x1a\x9c\x5e\x50"
       "\xf3\x95\xc8\x93\x87\xd0\x94\x5a\xed\xec\xa6\xeb\x40\x15\xdf\xc2"
       "\xbd\x24\x21\xee\x9b\xb7\x11\x83\xba\x88\x2c\xee\xbf\xef\x25\x9f"
       "\x33\xf9\xe2\x7d\xc6\x17\x8c\xb8\x9d\xc3\x74\x28\xcf\x9c\xc5\x2a"
       "\x2b\xaa\x2d\x3a"
     },
     {
       "pass\0word", 9,
       "sa\0lt", 5,
       GCRY_MD_STRIBOG512,
       4096,
       64,
       "\x50\xdf\x06\x28\x85\xb6\x98\x01\xa3\xc1\x02\x48\xeb\x0a\x27\xab"
       "\x6e\x52\x2f\xfe\xb2\x0c\x99\x1c\x66\x0f\x00\x14\x75\xd7\x3a\x4e"
       "\x16\x7f\x78\x2c\x18\xe9\x7e\x92\x97\x6d\x9c\x1d\x97\x08\x31\xea"
       "\x78\xcc\xb8\x79\xf6\x70\x68\xcd\xac\x19\x10\x74\x08\x44\xe8\x30"
     }
   };
   int tvidx;
   gpg_error_t err;
   unsigned char outbuf[100];
   int i;
 
   for (tvidx=0; tvidx < DIM(tv); tvidx++)
     {
       if (tv[tvidx].disabled)
         continue;
       if (gcry_md_test_algo (tv[tvidx].hashalgo) != 0)
         continue;
       if (verbose)
         fprintf (stderr, "checking PBKDF2 test vector %d algo %d\n", tvidx,
                  tv[tvidx].hashalgo);
       assert (tv[tvidx].dklen <= sizeof outbuf);
       err = gcry_kdf_derive (tv[tvidx].p, tv[tvidx].plen,
                              GCRY_KDF_PBKDF2, tv[tvidx].hashalgo,
                              tv[tvidx].salt, tv[tvidx].saltlen,
                              tv[tvidx].c, tv[tvidx].dklen, outbuf);
       if (in_fips_mode && tvidx > 7)
         {
           if (!err)
             fail ("pbkdf2 test %d unexpectedly passed in FIPS mode: %s\n",
                   tvidx, gpg_strerror (err));
           continue;
         }
       if (err)
         {
           if (in_fips_mode && (tv[tvidx].plen < 14 || tv[tvidx].dklen < 14))
             {
               if (verbose)
                 fprintf (stderr,
                          "  shorter key (%u) rejected correctly in fips mode\n",
                          (unsigned int)tv[tvidx].plen);
             }
           else
             fail ("pbkdf2 test %d failed: %s\n", tvidx, gpg_strerror (err));
         }
       else if (memcmp (outbuf, tv[tvidx].dk, tv[tvidx].dklen))
         {
           fail ("pbkdf2 test %d failed: mismatch\n", tvidx);
           fputs ("got:", stderr);
           for (i=0; i < tv[tvidx].dklen; i++)
             fprintf (stderr, " %02x", outbuf[i]);
           putc ('\n', stderr);
         }
     }
 }
 
 
 static void
 check_scrypt (void)
 {
   /* Test vectors are from draft-josefsson-scrypt-kdf-01.  */
   static struct {
     const char *p;        /* Passphrase.  */
     size_t plen;          /* Length of P. */
     const char *salt;
     size_t saltlen;
     int parm_n;           /* CPU/memory cost.  */
     int parm_r;           /* blocksize */
     unsigned long parm_p; /* parallelization. */
     int dklen;            /* Requested key length.  */
     const char *dk;       /* Derived key.  */
     int disabled;
   } tv[] = {
     {
       "", 0,
       "", 0,
       16,
       1,
       1,
       64,
       "\x77\xd6\x57\x62\x38\x65\x7b\x20\x3b\x19\xca\x42\xc1\x8a\x04\x97"
       "\xf1\x6b\x48\x44\xe3\x07\x4a\xe8\xdf\xdf\xfa\x3f\xed\xe2\x14\x42"
       "\xfc\xd0\x06\x9d\xed\x09\x48\xf8\x32\x6a\x75\x3a\x0f\xc8\x1f\x17"
       "\xe8\xd3\xe0\xfb\x2e\x0d\x36\x28\xcf\x35\xe2\x0c\x38\xd1\x89\x06"
     },
     {
       "password", 8,
       "NaCl", 4,
       1024,
       8,
       16,
       64,
       "\xfd\xba\xbe\x1c\x9d\x34\x72\x00\x78\x56\xe7\x19\x0d\x01\xe9\xfe"
       "\x7c\x6a\xd7\xcb\xc8\x23\x78\x30\xe7\x73\x76\x63\x4b\x37\x31\x62"
       "\x2e\xaf\x30\xd9\x2e\x22\xa3\x88\x6f\xf1\x09\x27\x9d\x98\x30\xda"
       "\xc7\x27\xaf\xb9\x4a\x83\xee\x6d\x83\x60\xcb\xdf\xa2\xcc\x06\x40"
     },
     {
       "pleaseletmein", 13,
       "SodiumChloride", 14,
       16384,
       8,
       1,
       64,
       "\x70\x23\xbd\xcb\x3a\xfd\x73\x48\x46\x1c\x06\xcd\x81\xfd\x38\xeb"
       "\xfd\xa8\xfb\xba\x90\x4f\x8e\x3e\xa9\xb5\x43\xf6\x54\x5d\xa1\xf2"
       "\xd5\x43\x29\x55\x61\x3f\x0f\xcf\x62\xd4\x97\x05\x24\x2a\x9a\xf9"
       "\xe6\x1e\x85\xdc\x0d\x65\x1e\x40\xdf\xcf\x01\x7b\x45\x57\x58\x87"
     },
     {
       "pleaseletmein", 13,
       "SodiumChloride", 14,
       1048576,
       8,
       1,
       64,
       "\x21\x01\xcb\x9b\x6a\x51\x1a\xae\xad\xdb\xbe\x09\xcf\x70\xf8\x81"
       "\xec\x56\x8d\x57\x4a\x2f\xfd\x4d\xab\xe5\xee\x98\x20\xad\xaa\x47"
       "\x8e\x56\xfd\x8f\x4b\xa5\xd0\x9f\xfa\x1c\x6d\x92\x7c\x40\xf4\xc3"
       "\x37\x30\x40\x49\xe8\xa9\x52\xfb\xcb\xf4\x5c\x6f\xa7\x7a\x41\xa4",
       2 /* Only in debug mode.  */
     }
   };
   int tvidx;
   gpg_error_t err;
   unsigned char outbuf[64];
   int i;
 
   for (tvidx=0; tvidx < DIM(tv); tvidx++)
     {
       if (tv[tvidx].disabled && !(tv[tvidx].disabled == 2 && debug))
         continue;
       if (verbose)
         fprintf (stderr, "checking SCRYPT test vector %d\n", tvidx);
       assert (tv[tvidx].dklen <= sizeof outbuf);
       err = gcry_kdf_derive (tv[tvidx].p, tv[tvidx].plen,
                              tv[tvidx].parm_r == 1 ? 41 : GCRY_KDF_SCRYPT,
                              tv[tvidx].parm_n,
                              tv[tvidx].salt, tv[tvidx].saltlen,
                              tv[tvidx].parm_p, tv[tvidx].dklen, outbuf);
       if (err)
         {
           if (in_fips_mode && tv[tvidx].plen < 14)
             {
               if (verbose)
                 fprintf (stderr,
                          "  shorter key (%u) rejected correctly in fips mode\n",
                          (unsigned int)tv[tvidx].plen);
             }
           else
             fail ("scrypt test %d failed: %s\n", tvidx, gpg_strerror (err));
         }
       else if (memcmp (outbuf, tv[tvidx].dk, tv[tvidx].dklen))
         {
           fail ("scrypt test %d failed: mismatch\n", tvidx);
           fputs ("got:", stderr);
           for (i=0; i < tv[tvidx].dklen; i++)
             fprintf (stderr, " %02x", outbuf[i]);
           putc ('\n', stderr);
         }
     }
 }
 
 
 #ifdef HAVE_PTHREAD
 #include <pthread.h>
 
 #define MAX_THREADS 8
 
 struct user_defined_threads_ctx
 {
   int oldest_thread_idx;
   int next_thread_idx;
   int num_threads_running;
   pthread_attr_t attr;
   pthread_t thread[MAX_THREADS];
   struct job_thread_param
   {
     gcry_kdf_job_fn_t job;
     void *priv;
   } work[MAX_THREADS];
 };
 
 static void *
 job_thread (void *p)
 {
   struct job_thread_param *param = p;
   param->job (param->priv);
   pthread_exit (NULL);
 }
 
 static int
 wait_all_jobs_completion (void *jobs_context);
 
 static int
 pthread_jobs_launch_job (void *jobs_context, gcry_kdf_job_fn_t job,
 			 void *job_priv)
 {
   struct user_defined_threads_ctx *ctx = jobs_context;
   int ret;
 
   if (ctx->next_thread_idx == ctx->oldest_thread_idx)
     {
       assert (ctx->num_threads_running == MAX_THREADS);
       /* thread limit reached, join a thread */
       ret = pthread_join (ctx->thread[ctx->oldest_thread_idx], NULL);
       if (ret)
 	return -1;
       ctx->oldest_thread_idx = (ctx->oldest_thread_idx + 1) % MAX_THREADS;
       ctx->num_threads_running--;
     }
 
   ctx->work[ctx->next_thread_idx].job = job;
   ctx->work[ctx->next_thread_idx].priv = job_priv;
   ret = pthread_create (&ctx->thread[ctx->next_thread_idx], &ctx->attr,
 			job_thread, &ctx->work[ctx->next_thread_idx]);
   if (ret)
     {
       /* could not create new thread. */
       (void)wait_all_jobs_completion (jobs_context);
       return -1;
     }
 
   if (ctx->oldest_thread_idx < 0)
     ctx->oldest_thread_idx = ctx->next_thread_idx;
   ctx->next_thread_idx = (ctx->next_thread_idx + 1) % MAX_THREADS;
   ctx->num_threads_running++;
   return 0;
 }
 
 static int
 wait_all_jobs_completion (void *jobs_context)
 {
   struct user_defined_threads_ctx *ctx = jobs_context;
   int i, idx;
   int ret;
 
   for (i = 0; i < ctx->num_threads_running; i++)
     {
       idx = (ctx->oldest_thread_idx + i) % MAX_THREADS;
       ret = pthread_join (ctx->thread[idx], NULL);
       if (ret)
 	return -1;
     }
 
   /* reset context for next round of parallel work */
   ctx->num_threads_running = 0;
   ctx->oldest_thread_idx = -1;
   ctx->next_thread_idx = 0;
 
   return 0;
 }
 #endif
 
 static gcry_error_t
 my_kdf_derive (int parallel,
                int algo, int subalgo,
                const unsigned long *params, unsigned int paramslen,
                const char *pass, size_t passlen,
                const char *salt, size_t saltlen,
                const char *key, size_t keylen,
                const char *ad, size_t adlen,
                size_t outlen, unsigned char *out)
 {
   gcry_error_t err;
   gcry_kdf_hd_t hd;
 
   (void)parallel;
 
   err = gcry_kdf_open (&hd, algo, subalgo, params, paramslen,
                        pass, passlen, salt, saltlen, key, keylen,
                        ad, adlen);
   if (err)
     return err;
 
 #ifdef HAVE_PTHREAD
   if (parallel)
     {
       struct user_defined_threads_ctx jobs_context;
       const gcry_kdf_thread_ops_t ops =
       {
         &jobs_context,
         pthread_jobs_launch_job,
         wait_all_jobs_completion
       };
 
       memset (&jobs_context, 0, sizeof (struct user_defined_threads_ctx));
       jobs_context.oldest_thread_idx = -1;
 
       if (pthread_attr_init (&jobs_context.attr))
 	{
           err = gpg_error_from_syserror ();
 	  gcry_kdf_close (hd);
 	  return err;
 	}
 
       if (pthread_attr_setdetachstate (&jobs_context.attr,
                                        PTHREAD_CREATE_JOINABLE))
 	{
           err = gpg_error_from_syserror ();
 	  pthread_attr_destroy (&jobs_context.attr);
 	  gcry_kdf_close (hd);
 	  return err;
 	}
 
       err = gcry_kdf_compute (hd, &ops);
 
       pthread_attr_destroy (&jobs_context. attr);
     }
   else
 #endif
     {
       err = gcry_kdf_compute (hd, NULL);
     }
 
   if (!err)
     err = gcry_kdf_final (hd, outlen, out);
 
   gcry_kdf_close (hd);
   return err;
 }
 
 static void
 check_argon2 (void)
 {
   gcry_error_t err;
   static struct {
     int subalgo;
     unsigned long param[4];
     size_t passlen;
     const char *pass;
     size_t saltlen;
     const char *salt;
     size_t keylen;
     const char *key;
     size_t adlen;
     const char *ad;
     size_t dklen;
     const char *dk;
   } tv[] = {
     {
       GCRY_KDF_ARGON2D,
       { 32, 3, 32, 4 },
       32,
       "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
       "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01",
       16,
       "\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02",
       8,
       "\x03\x03\x03\x03\x03\x03\x03\x03",
       12,
       "\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04",
       32,
       "\x51\x2b\x39\x1b\x6f\x11\x62\x97\x53\x71\xd3\x09\x19\x73\x42\x94"
       "\xf8\x68\xe3\xbe\x39\x84\xf3\xc1\xa1\x3a\x4d\xb9\xfa\xbe\x4a\xcb"
     },
     {
       GCRY_KDF_ARGON2I,
       { 32, 3, 32, 4 },
       32,
       "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
       "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01",
       16,
       "\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02",
       8,
       "\x03\x03\x03\x03\x03\x03\x03\x03",
       12,
       "\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04",
       32,
       "\xc8\x14\xd9\xd1\xdc\x7f\x37\xaa\x13\xf0\xd7\x7f\x24\x94\xbd\xa1"
       "\xc8\xde\x6b\x01\x6d\xd3\x88\xd2\x99\x52\xa4\xc4\x67\x2b\x6c\xe8"
     },
     {
       GCRY_KDF_ARGON2ID,
       { 32, 3, 32, 4 },
       32,
       "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
       "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01",
       16,
       "\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02",
       8,
       "\x03\x03\x03\x03\x03\x03\x03\x03",
       12,
       "\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04",
       32,
       "\x0d\x64\x0d\xf5\x8d\x78\x76\x6c\x08\xc0\x37\xa3\x4a\x8b\x53\xc9"
       "\xd0\x1e\xf0\x45\x2d\x75\xb6\x5e\xb5\x25\x20\xe9\x6b\x01\xe6\x59"
     },
     {
       /* empty password */
       GCRY_KDF_ARGON2I,
       { 32, 3, 128, 1 },
       0, NULL,
       16,
       "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
       0, NULL,
       0, NULL,
       32,
       "\xbb\x1f\xf2\xb9\x9f\xd4\x4a\xd9\xdf\x7f\xb9\x54\x55\x9e\xb8\xeb"
       "\xb5\x9d\xab\xce\x2e\x62\x9f\x9b\x89\x09\xfe\xde\x57\xcc\x63\x86"
     },
     {
       /* empty password */
       GCRY_KDF_ARGON2ID,
       { 32, 3, 128, 1 },
       0, NULL,
       16,
       "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
       0, NULL,
       0, NULL,
       32,
       "\x09\x2f\x38\x35\xac\xb2\x43\x92\x93\xeb\xcd\xe8\x04\x16\x6a\x31"
       "\xce\x14\xd4\x55\xdb\xd8\xf7\xe6\xb4\xf5\x9d\x64\x8e\xd0\x3a\xdb"
     },
   };
   unsigned char out[32];
   int i;
   int count;
 
   for (count = 0; count < DIM(tv); count++)
     {
       if (verbose)
         fprintf (stderr, "checking ARGON2 test vector %d\n", count);
 
       err = my_kdf_derive (0, GCRY_KDF_ARGON2,
                            tv[count].subalgo, tv[count].param, 4,
                            tv[count].pass, tv[count].passlen,
                            tv[count].salt, tv[count].saltlen,
                            tv[count].key, tv[count].keylen,
                            tv[count].ad, tv[count].adlen,
                            tv[count].dklen, out);
       if (err)
         fail ("argon2 test %d failed: %s\n", count*2+0, gpg_strerror (err));
       else if (memcmp (out, tv[count].dk, tv[count].dklen))
         {
           fail ("argon2 test %d failed: mismatch\n", count*2+0);
           fputs ("got:", stderr);
           for (i=0; i < tv[count].dklen; i++)
             fprintf (stderr, " %02x", out[i]);
           putc ('\n', stderr);
         }
 
 #ifdef HAVE_PTHREAD
       err = my_kdf_derive (1, GCRY_KDF_ARGON2,
                            tv[count].subalgo, tv[count].param, 4,
                            tv[count].pass, tv[count].passlen,
                            tv[count].salt, tv[count].saltlen,
                            tv[count].key, tv[count].keylen,
                            tv[count].ad, tv[count].adlen,
                            tv[count].dklen, out);
       if (err)
         fail ("argon2 test %d failed: %s\n", count*2+1, gpg_strerror (err));
       else if (memcmp (out, tv[count].dk, tv[count].dklen))
         {
           fail ("argon2 test %d failed: mismatch\n", count*2+1);
           fputs ("got:", stderr);
           for (i=0; i < tv[count].dklen; i++)
             fprintf (stderr, " %02x", out[i]);
           putc ('\n', stderr);
         }
 #endif
     }
 }
 
 
 static void
 check_balloon (void)
 {
   gcry_error_t err;
   /* Two test vectors generated by the research prototype implementation.
      $ balloon abcdefghijklmno
      t_cost         = 1
      s_cost         = 1024
      p_cost         = 1
      passwd         = abcdefghijklmno
      Time total      : 0.0527251
      Hashes per sec  : 18.9663
      Output          : $balloon$v=1$s=1024,t=1,p=1
                        $FRzqOiIuPvuoy55vGfKzyse+2f28F7m9iFHCctnEBwg=
                        $NxOGNPyTPZzKiJjgj7H6pJDLIgR05HI7VaxJpxEao5Q=
      $ balloon -t 12 -s 4096 -p 4 Long_sentence_used_as_passphrase
      t_cost         = 12
      s_cost         = 4096
      p_cost         = 4
      passwd         = Long_sentence_used_as_passphrase
      Time total      : 3.70399
      Hashes per sec  : 0.269979
      Output          : $balloon$v=1$s=4096,t=12,p=4
                        $8Yor74EqTwBrrdaeYeSVx0VXVAgDrsILAnJWdVUy93s=
                        $FaNb9ofeWEggzhW9BUSODgZH5/awzNz5Adoub48+BgQ=
    */
   static struct {
     int subalgo;
     unsigned long param[3];
     size_t passlen;
     const char *pass;
     size_t saltlen;
     const char *salt;
     size_t dklen;
     const char *dk;
   } tv[] = {
     {
       GCRY_MD_SHA256,
       { 1024, 1, 1 },
       15,
       "abcdefghijklmno",
       32,
       "\x15\x1c\xea\x3a\x22\x2e\x3e\xfb\xa8\xcb\x9e\x6f\x19\xf2\xb3\xca"
       "\xc7\xbe\xd9\xfd\xbc\x17\xb9\xbd\x88\x51\xc2\x72\xd9\xc4\x07\x08",
       32,
       "\x37\x13\x86\x34\xfc\x93\x3d\x9c\xca\x88\x98\xe0\x8f\xb1\xfa\xa4"
       "\x90\xcb\x22\x04\x74\xe4\x72\x3b\x55\xac\x49\xa7\x11\x1a\xa3\x94"
     },
     {
       GCRY_MD_SHA256,
       { 4096, 12, 4 },
       32,
       "Long_sentence_used_as_passphrase",
       32,
       "\xf1\x8a\x2b\xef\x81\x2a\x4f\x00\x6b\xad\xd6\x9e\x61\xe4\x95\xc7"
       "\x45\x57\x54\x08\x03\xae\xc2\x0b\x02\x72\x56\x75\x55\x32\xf7\x7b",
       32,
       "\x15\xa3\x5b\xf6\x87\xde\x58\x48\x20\xce\x15\xbd\x05\x44\x8e\x0e"
       "\x06\x47\xe7\xf6\xb0\xcc\xdc\xf9\x01\xda\x2e\x6f\x8f\x3e\x06\x04"
     }
   };
   unsigned char out[32];
   int i;
   int count;
 
   for (count = 0; count < DIM(tv); count++)
     {
       if (verbose)
         fprintf (stderr, "checking Balloon test vector %d\n", count);
 
       err = my_kdf_derive (0, GCRY_KDF_BALLOON,
                            tv[count].subalgo, tv[count].param, 3,
                            tv[count].pass, tv[count].passlen,
                            tv[count].salt, tv[count].saltlen,
                            NULL, 0, NULL, 0, tv[count].dklen, out);
       if (err)
         fail ("balloon test %d failed: %s\n", count*2+0, gpg_strerror (err));
       else if (memcmp (out, tv[count].dk, tv[count].dklen))
         {
           fail ("balloon test %d failed: mismatch\n", count*2+0);
           fputs ("got:", stderr);
           for (i=0; i < tv[count].dklen; i++)
             fprintf (stderr, " %02x", out[i]);
           putc ('\n', stderr);
         }
 
 #ifdef HAVE_PTHREAD
       err = my_kdf_derive (1, GCRY_KDF_BALLOON,
                            tv[count].subalgo, tv[count].param, 3,
                            tv[count].pass, tv[count].passlen,
                            tv[count].salt, tv[count].saltlen,
                            NULL, 0, NULL, 0, tv[count].dklen, out);
       if (err)
         fail ("balloon test %d failed: %s\n", count*2+1, gpg_strerror (err));
       else if (memcmp (out, tv[count].dk, tv[count].dklen))
         {
           fail ("balloon test %d failed: mismatch\n", count*2+1);
           fputs ("got:", stderr);
           for (i=0; i < tv[count].dklen; i++)
             fprintf (stderr, " %02x", out[i]);
           putc ('\n', stderr);
         }
 #endif
     }
 }
 
 
 static void
 check_onestep_kdf (void)
 {
   gcry_error_t err;
   static struct {
     int algo;
     int subalgo;
     unsigned long param[1];
     size_t inputlen;
     const char *input;
     size_t otherlen;
     const char *other;
     size_t keylen;
     const char *key;
     size_t dklen;
     const char *dk;
   } tv[] = {
     {
       GCRY_KDF_ONESTEP_KDF, GCRY_MD_SHA256,
       { 38 },
       16,
       "\x3f\x89\x2b\xd8\xb8\x4d\xae\x64\xa7\x82\xa3\x5f\x6e\xaa\x8f\x00",
       12,
       "\xec\x3f\x1c\xd8\x73\xd2\x88\x58\xa5\x8c\xc3\x9e",
       0, NULL,
       38,
       "\xa7\xc0\x66\x52\x98\x25\x25\x31\xe0\xdb\x37\x73\x7a\x37\x46\x51"
       "\xb3\x68\x27\x5f\x20\x48\x28\x4d\x16\xa1\x66\xc6\xd8\xa9\x0a\x91"
       "\xa4\x91\xc1\x6f\x49\x64"
    },
     {
       GCRY_KDF_ONESTEP_KDF, GCRY_MD_SHA512,
       { 68 },
       16,
       "\xe6\x5b\x19\x05\x87\x8b\x95\xf6\x8b\x55\x35\xbd\x3b\x2b\x10\x13",
       12,
       "\x83\x02\x21\xb1\x73\x0d\x91\x76\xf8\x07\xd4\x07",
       0, NULL,
       68,
       "\xb8\xc4\x4b\xdf\x0b\x85\xa6\x4b\x6a\x51\xc1\x2a\x06\x71\x0e\x37"
       "\x3d\x82\x9b\xb1\xfd\xa5\xb4\xe1\xa2\x07\x95\xc6\x19\x95\x94\xf6"
       "\xfa\x65\x19\x8a\x72\x12\x57\xf7\xd5\x8c\xb2\xf6\xf6\xdb\x9b\xb5"
       "\x69\x9f\x73\x86\x30\x45\x90\x90\x54\xb2\x38\x9e\x06\xec\x00\xfe"
       "\x31\x8c\xab\xd9"
     },
     {
       GCRY_KDF_ONESTEP_KDF_MAC, GCRY_MAC_HMAC_SHA256,
       { 44 },
       16,
       "\x02\xb4\x0d\x33\xe3\xf6\x85\xae\xae\x67\x7a\xc3\x44\xee\xaf\x77",
       12,
       "\xc6\x7c\x38\x95\x80\x12\x8f\x18\xf6\xcf\x85\x92",
       16,
       "\x0a\xd5\x2c\x93\x57\xc8\x5e\x47\x81\x29\x6a\x36\xca\x72\x03\x9c",
       44,
       "\xbe\x32\xe7\xd3\x06\xd8\x91\x02\x8b\xe0\x88\xf2\x13\xf9\xf9\x47"
       "\xc5\x04\x20\xd9\xb5\xa1\x2c\xa6\x98\x18\xdd\x99\x95\xde\xdd\x8e"
       "\x61\x37\xc7\x10\x4d\x67\xf2\xca\x90\x91\x5d\xda"
     },
     {
       GCRY_KDF_ONESTEP_KDF_MAC, GCRY_MAC_HMAC_SHA512,
       { 56 },
       16,
       "\x8e\x5c\xd5\xf6\xae\x55\x8f\xfa\x04\xcd\xa2\xfa\xd9\x4d\xd6\x16",
       12,
       "\x4a\x43\x30\x18\xe5\x1c\x09\xbb\xd6\x13\x26\xbb",
       16,
       "\x6e\xd9\x3b\x6f\xe5\xb3\x50\x2b\xb4\x2b\x4c\x0f\xcb\x13\x36\x62",
       56,
       "\x29\x5d\xfb\xeb\x54\xec\x0f\xe2\x4e\xce\x32\xf5\xb8\x7c\x85\x3e"
       "\x69\x9a\x62\xe3\x9d\x9c\x9e\xe6\xee\x78\xf8\xb9\xa0\xee\x50\xa3"
       "\x6a\x82\xe6\x06\x2c\x95\xed\x53\xbc\x36\x67\x00\xe2\xd0\xe0\x93"
       "\xbf\x75\x2e\xea\x42\x99\x47\x2e"
     },
   };
   unsigned char out[68];
   int i;
   int count;
 
   for (count = 0; count < DIM(tv); count++)
     {
       if (verbose)
         fprintf (stderr, "checking OneStepKDF test vector %d\n", count);
 
       err = my_kdf_derive (0, tv[count].algo, tv[count].subalgo,
                            tv[count].param, 1,
                            tv[count].input, tv[count].inputlen, NULL, 0,
                            tv[count].key, tv[count].keylen,
                            tv[count].other, tv[count].otherlen,
                            tv[count].dklen, out);
       if (err)
         fail ("OneStepKDF test %d failed: %s\n", count, gpg_strerror (err));
       else if (memcmp (out, tv[count].dk, tv[count].dklen))
         {
           fail ("OneStepKDF test %d failed: mismatch\n", count);
           fputs ("got:", stderr);
           for (i=0; i < tv[count].dklen; i++)
             fprintf (stderr, " %02x", out[i]);
           putc ('\n', stderr);
         }
     }
 }
 
 
 static void
 check_hkdf (void)
 {
   gcry_error_t err;
   static struct {
     unsigned long param[1];
     size_t inputlen;
     const char *input;
     size_t saltlen;
     const char *salt;
     size_t infolen;
     const char *info;
     size_t dklen;
     const char *dk;
   } tv[] = {
     {
       { 42 },
       22,
       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
       "\x0b\x0b\x0b\x0b\x0b\x0b",
       13,
       "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
       10,
       "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
       42,
       "\x3c\xb2\x5f\x25\xfa\xac\xd5\x7a\x90\x43\x4f\x64\xd0\x36\x2f\x2a"
       "\x2d\x2d\x0a\x90\xcf\x1a\x5a\x4c\x5d\xb0\x2d\x56\xec\xc4\xc5\xbf"
       "\x34\x00\x72\x08\xd5\xb8\x87\x18\x58\x65"
     },
     {
       { 82 },
       80,
       "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
       "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
       "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
       "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
       "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f",
       80,
       "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
       "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
       "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
       "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
       "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf",
       80,
       "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
       "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
       "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
       "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
       "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
       82,
       "\xb1\x1e\x39\x8d\xc8\x03\x27\xa1\xc8\xe7\xf7\x8c\x59\x6a\x49\x34"
       "\x4f\x01\x2e\xda\x2d\x4e\xfa\xd8\xa0\x50\xcc\x4c\x19\xaf\xa9\x7c"
       "\x59\x04\x5a\x99\xca\xc7\x82\x72\x71\xcb\x41\xc6\x5e\x59\x0e\x09"
       "\xda\x32\x75\x60\x0c\x2f\x09\xb8\x36\x77\x93\xa9\xac\xa3\xdb\x71"
       "\xcc\x30\xc5\x81\x79\xec\x3e\x87\xc1\x4c\x01\xd5\xc1\xf3\x43\x4f"
       "\x1d\x87"
     },
     {
       { 42 },
       22,
       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
       "\x0b\x0b\x0b\x0b\x0b\x0b",
       0, NULL,
       0, NULL,
       42,
       "\x8d\xa4\xe7\x75\xa5\x63\xc1\x8f\x71\x5f\x80\x2a\x06\x3c\x5a\x31"
       "\xb8\xa1\x1f\x5c\x5e\xe1\x87\x9e\xc3\x45\x4e\x5f\x3c\x73\x8d\x2d"
       "\x9d\x20\x13\x95\xfa\xa4\xb6\x1a\x96\xc8"
     },
   };
   unsigned char out[82];
   int i;
   int count;
 
   for (count = 0; count < DIM(tv); count++)
     {
       if (verbose)
         fprintf (stderr, "checking HKDF test vector %d\n", count);
 
       err = my_kdf_derive (0, GCRY_KDF_HKDF, GCRY_MAC_HMAC_SHA256,
                            tv[count].param, 1,
                            tv[count].input, tv[count].inputlen, NULL, 0,
                            tv[count].salt, tv[count].saltlen,
                            tv[count].info, tv[count].infolen,
                            tv[count].dklen, out);
       if (err)
         fail ("HKDF test %d failed: %s\n", count, gpg_strerror (err));
       else if (memcmp (out, tv[count].dk, tv[count].dklen))
         {
           fail ("HKDF test %d failed: mismatch\n", count);
           fputs ("got:", stderr);
           for (i=0; i < tv[count].dklen; i++)
             fprintf (stderr, " %02x", out[i]);
           putc ('\n', stderr);
         }
     }
 }
 
 static void
 check_fips_indicators (void)
 {
   enum gcry_kdf_algos fips_kdf_algos[] = {
     GCRY_KDF_PBKDF2,
   };
   enum gcry_kdf_algos kdf_algos[] = {
     GCRY_KDF_SIMPLE_S2K,
     GCRY_KDF_SALTED_S2K,
     GCRY_KDF_ITERSALTED_S2K,
     GCRY_KDF_PBKDF1,
     GCRY_KDF_PBKDF2,
     GCRY_KDF_SCRYPT,
     GCRY_KDF_ARGON2  ,
     GCRY_KDF_BALLOON ,
     GCRY_KDF_ONESTEP_KDF,
     GCRY_KDF_ONESTEP_KDF_MAC,
     GCRY_KDF_HKDF,
   };
   size_t i, j;
 
   for (i = 0; i < sizeof(kdf_algos) / sizeof(*kdf_algos); i++)
     {
       int is_fips_kdf_algo = 0;
       gcry_error_t err = gcry_control (GCRYCTL_FIPS_SERVICE_INDICATOR_KDF, kdf_algos[i]);
 
       if (verbose)
         fprintf (stderr, "checking FIPS indicator for KDF %d: %s\n",
                  kdf_algos[i], gcry_strerror (err));
 
       for (j = 0; j < sizeof(fips_kdf_algos) / sizeof(*fips_kdf_algos); j++)
         {
           if (kdf_algos[i] == fips_kdf_algos[j])
             {
               is_fips_kdf_algo = 1;
               break;
             }
         }
 
       switch (err & GPG_ERR_CODE_MASK)
         {
           case GPG_ERR_NO_ERROR:
             if (!is_fips_kdf_algo)
               fail ("KDF algorithm %d is marked as approved by"
                     " GCRYCTL_FIPS_SERVICE_INDICATOR_KDF, but only PBKDF2 should"
                     " be marked as approved.", kdf_algos[i]);
             break;
           case GPG_ERR_NOT_SUPPORTED:
             if (is_fips_kdf_algo)
               fail ("KDF algorithm %d is marked as not approved by"
                     " GCRYCTL_FIPS_SERVICE_INDICATOR_KDF, but it should be"
                     " approved", kdf_algos[i]);
             break;
           default:
             fail ("Unexpected error '%s' (%d) returned by"
                   " GCRYCTL_FIPS_SERVICE_INDICATOR_KDF for KDF algorithm %d",
                   gcry_strerror (err), err, kdf_algos[i]);
         }
     }
 }
 
 
 int
 main (int argc, char **argv)
 {
   int last_argc = -1;
   unsigned long s2kcount = 0;
 
   if (argc)
     { argc--; argv++; }
 
   while (argc && last_argc != argc )
     {
       last_argc = argc;
       if (!strcmp (*argv, "--"))
         {
           argc--; argv++;
           break;
         }
       else if (!strcmp (*argv, "--help"))
         {
           fputs ("usage: t-kdf [options]"
                  "Options:\n"
                  " --verbose    print timinigs etc.\n"
                  " --debug      flyswatter\n"
                  " --s2k        print the time needed for S2K\n",
                  stdout);
           exit (0);
         }
       else if (!strcmp (*argv, "--verbose"))
         {
           verbose++;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--debug"))
         {
           verbose += 2;
           debug++;
           argc--; argv++;
         }
       else if (!strcmp (*argv, "--s2k"))
         {
           s2kcount = 1;
           argc--; argv++;
         }
       else if (!strncmp (*argv, "--", 2))
         die ("unknown option '%s'\n", *argv);
     }
 
   if (s2kcount)
     {
       if (argc != 1)
         die ("usage: t-kdf --s2k S2KCOUNT\n");
       s2kcount = strtoul (*argv, NULL, 10);
       if (!s2kcount)
         die ("t-kdf: S2KCOUNT must be positive\n");
     }
 
   if (!gcry_check_version (GCRYPT_VERSION))
     die ("version mismatch\n");
 
   if (gcry_fips_mode_active ())
     in_fips_mode = 1;
 
   if (!in_fips_mode)
     xgcry_control ((GCRYCTL_DISABLE_SECMEM, 0));
 
   xgcry_control ((GCRYCTL_INITIALIZATION_FINISHED, 0));
   if (debug)
     xgcry_control ((GCRYCTL_SET_DEBUG_FLAGS, 1u, 0));
 
   if (s2kcount)
     bench_s2k (s2kcount);
   else
     {
       check_openpgp ();
       check_pbkdf2 ();
       check_scrypt ();
       check_argon2 ();
       check_balloon ();
       check_onestep_kdf ();
       check_hkdf ();
       if (in_fips_mode)
         check_fips_indicators();
     }
 
   return error_count ? 1 : 0;
 }
diff --git a/tests/testapi.c b/tests/testapi.c
index 2355859f..e1ab47da 100644
--- a/tests/testapi.c
+++ b/tests/testapi.c
@@ -1,132 +1,132 @@
 /* testapi.c - for libgcrypt
  *	Copyright (C) 2000, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser general Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <gcrypt.h>
 
 #define PGM "testapi"
 #include "t-common.h"
 
 #define BUG() do {fprintf ( stderr, "Ooops at %s:%d\n", __FILE__ , __LINE__ );\
 		  exit(2);} while(0)
 
 /* an ElGamal public key */
 struct {
     const char *p,*g,*y;
 } elg_testkey1 = {
   "0x9D559F31A6D30492C383213844AEBB7772963A85D3239F3611AAB93A2A985F64FB735B9259EC326BF5720F909980D609D37C288C9223B0350FBE493C3B5AF54CA23031E952E92F8A3DBEDBC5A684993D452CD54F85B85160166FCD25BD7AB6AE9B1EB4FCC9D300DAFF081C4CBA6694906D3E3FF18196A5CCF7F0A6182962166B",
   "0x5",
   "0x9640024BB2A277205813FF685048AA27E2B192B667163E7C59E381E27003D044C700C531CE8FD4AA781B463BC9FFE74956AF09A38A098322B1CF72FC896F009E3A6BFF053D3B1D1E1994BF9CC07FA12963D782F027B51511DDE8C5F43421FBC12734A9C070F158C729A370BEE5FC51A772219438EDA8202C35FA3F5D8CD1997B"
 };
 
 void
 test_sexp ( int argc, char **argv )
 {
     int rc, nbits;
     gcry_sexp_t sexp;
     gcry_mpi_t key[3];
     size_t n;
     char *buf;
 
     (void)argc;
     (void)argv;
 
     if ( gcry_mpi_scan( &key[0], GCRYMPI_FMT_HEX, elg_testkey1.p, 0, NULL ) )
 	BUG();
     if ( gcry_mpi_scan( &key[1], GCRYMPI_FMT_HEX, elg_testkey1.g, 0, NULL ) )
 	BUG();
     if ( gcry_mpi_scan( &key[2], GCRYMPI_FMT_HEX, elg_testkey1.y, 0, NULL ) )
 	BUG();
 
     /* get nbits from a key */
     rc = gcry_sexp_build ( &sexp, NULL,
 			   "(public-key(elg(p%m)(g%m)(y%m)))",
 				  key[0], key[1], key[2] );
     fprintf (stderr, "DUMP of PK (rc=%d):\n", rc);
     gcry_sexp_dump ( sexp );
     {  gcry_sexp_t x;
        x = gcry_sexp_cdr ( sexp );
        fputs ( "DUMP of CDR:\n", stderr );
        gcry_sexp_dump ( x );
        gcry_sexp_release ( x );
     }
     nbits = gcry_pk_get_nbits( sexp );
     printf ( "elg_testkey1 - nbits=%d\n", nbits );
     n = gcry_sexp_sprint ( sexp, 0, NULL, 0 );
     buf = gcry_xmalloc ( n );
     n = gcry_sexp_sprint ( sexp, 0, buf, n );
     printf ( "sprint length=%u\n", (unsigned int)n );
     gcry_free ( buf );
     gcry_sexp_release( sexp );
 }
 
 
 void
 test_genkey ( int argc, char **argv )
 {
     int rc, nbits = 1024;
     gcry_sexp_t s_parms, s_key;
 
     (void)argc;
     (void)argv;
 
     xgcry_control ( (GCRYCTL_INIT_SECMEM, 16384, 0) );
     rc = gcry_sexp_build ( &s_parms, NULL, "(genkey(dsa(nbits %d)))", nbits );
     rc = gcry_pk_genkey( &s_key, s_parms );
     if ( rc ) {
 	fprintf ( stderr, "genkey failed: %s\n", gpg_strerror (rc) );
 	return;
     }
     gcry_sexp_release( s_parms );
     gcry_sexp_dump ( s_key );
     gcry_sexp_release( s_key );
 }
 
 int
 main( int argc, char **argv )
 {
   const char *s;
 
   if ( argc < 2 )
     {
       s = gcry_check_version (NULL);
       printf("%s\n", s? s : "(null)");
     }
   else if ( !strcmp ( argv[1], "version") )
     {
       s = gcry_check_version (argc > 2 ? argv[2] : NULL );
       printf("%s\n", s? s : "(null)");
     }
   else if ( !strcmp ( argv[1], "sexp" ) )
     test_sexp ( argc-2, argv+2 );
   else if ( !strcmp ( argv[1], "genkey" ) )
     test_genkey ( argc-2, argv+2 );
   else {
     fprintf (stderr, "usage: testapi mode-string [mode-args]\n");
     return 1;
   }
 
   return 0;
 }