diff --git a/mpi/ec-nist.c b/mpi/ec-nist.c
index 6dfaa1da..3213f9df 100644
--- a/mpi/ec-nist.c
+++ b/mpi/ec-nist.c
@@ -1,817 +1,817 @@
 /* ec-nist.c -  NIST optimized elliptic curve functions
  * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
 
 
 #ifndef ASM_DISABLED
 
 
 #include "mpi-internal.h"
 #include "longlong.h"
 #include "g10lib.h"
 #include "context.h"
 #include "ec-context.h"
 #include "ec-inline.h"
 #include "const-time.h"
 
 
 static inline
 void prefetch(const void *tab, size_t len)
 {
   const volatile byte *vtab = tab;
 
   if (len > 0 * 64)
     (void)vtab[0 * 64];
   if (len > 1 * 64)
     (void)vtab[1 * 64];
   if (len > 2 * 64)
     (void)vtab[2 * 64];
   if (len > 3 * 64)
     (void)vtab[3 * 64];
   if (len > 4 * 64)
     (void)vtab[4 * 64];
   if (len > 5 * 64)
     (void)vtab[5 * 64];
   if (len > 6 * 64)
     (void)vtab[6 * 64];
   if (len > 7 * 64)
     (void)vtab[7 * 64];
   if (len > 8 * 64)
     (void)vtab[8 * 64];
   if (len > 9 * 64)
     (void)vtab[9 * 64];
   if (len > 10 * 64)
     (void)vtab[10 * 64];
   (void)vtab[len - 1];
 }
 
 
 /* Fast reduction routines for NIST curves.  */
 
 void
 _gcry_mpi_ec_nist192_mod (gcry_mpi_t w, mpi_ec_t ctx)
 {
   static const mpi_limb64_t p_mult[3][4] =
   {
     { /* P * 1 */
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xfffffffeU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U)
     },
     { /* P * 2 */
       LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0xffffffffU, 0xfffffffdU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000001U)
     },
     { /* P * 3 */
       LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0xffffffffU, 0xfffffffcU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000002U)
     }
   };
   const mpi_limb64_t zero = LIMB_TO64(0);
   mpi_ptr_t wp;
   mpi_limb64_t s[192 / BITS_PER_MPI_LIMB64 + 1];
   mpi_limb64_t o[DIM(s)];
   const mpi_size_t wsize = DIM(s) - 1;
   mpi_limb_t mask1;
   mpi_limb_t mask2;
   mpi_limb_t s_is_negative;
   int carry;
 
   MPN_NORMALIZE (w->d, w->nlimbs);
   if (mpi_nbits_more_than (w, 2 * 192))
     log_bug ("W must be less than m^2\n");
 
   RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64);
   RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64);
 
   wp = w->d;
 
   prefetch (p_mult, sizeof(p_mult));
 
   /* See "FIPS 186-4, D.2.1 Curve P-192". */
 
   s[0] = LOAD64(wp, 3);
   ADD3_LIMB64 (s[3],  s[2],          s[1],
 	       zero,  zero,          LOAD64(wp, 3),
 	       zero,  LOAD64(wp, 4), LOAD64(wp, 4));
 
   ADD4_LIMB64 (s[3],  s[2],          s[1],          s[0],
 	       s[3],  s[2],          s[1],          s[0],
 	       zero,  LOAD64(wp, 5), LOAD64(wp, 5), LOAD64(wp, 5));
 
   ADD4_LIMB64 (s[3],  s[2],          s[1],          s[0],
 	       s[3],  s[2],          s[1],          s[0],
 	       zero,  LOAD64(wp, 2), LOAD64(wp, 1), LOAD64(wp, 0));
 
   /* mod p:
    *  's[3]' holds carry value (0..2). Subtract (carry + 1) * p. Result will be
    *  with in range -p...p. Handle result being negative with addition and
    *  conditional store. */
 
   carry = LO32_LIMB64(s[3]);
 
   SUB4_LIMB64 (s[3], s[2], s[1], s[0],
 	       s[3], s[2], s[1], s[0],
 	       p_mult[carry][3], p_mult[carry][2],
 	       p_mult[carry][1], p_mult[carry][0]);
 
   ADD4_LIMB64 (o[3], o[2], o[1], o[0],
 	       s[3], s[2], s[1], s[0],
 	       zero,
 	       p_mult[0][2], p_mult[0][1], p_mult[0][0]);
 
   s_is_negative = LO32_LIMB64(s[3]) >> 31;
 
-  mask2 = _gcry_ct_vzero - s_is_negative;
-  mask1 = s_is_negative - _gcry_ct_vone;
+  mask2 = ct_limb_gen_mask(s_is_negative);
+  mask1 = ct_limb_gen_inv_mask(s_is_negative);
 
   STORE64_COND(wp, 0, mask2, o[0], mask1, s[0]);
   STORE64_COND(wp, 1, mask2, o[1], mask1, s[1]);
   STORE64_COND(wp, 2, mask2, o[2], mask1, s[2]);
 
   w->nlimbs = 192 / BITS_PER_MPI_LIMB;
   MPN_NORMALIZE (wp, w->nlimbs);
 }
 
 void
 _gcry_mpi_ec_nist224_mod (gcry_mpi_t w, mpi_ec_t ctx)
 {
   static const mpi_limb64_t p_mult[5][4] =
   {
     { /* P * -1 */
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xffffffffU, 0x00000000U)
     },
     { /* P * 0 */
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U)
     },
     { /* P * 1 */
       LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0xffffffffU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU)
     },
     { /* P * 2 */
       LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0xfffffffeU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000001U, 0xffffffffU)
     },
     { /* P * 3 */
       LIMB64_C(0x00000000U, 0x00000003U), LIMB64_C(0xfffffffdU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000002U, 0xffffffffU)
     }
   };
   const mpi_limb64_t zero = LIMB_TO64(0);
   mpi_ptr_t wp;
   mpi_limb64_t s[(224 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64];
   mpi_limb64_t d[DIM(s)];
   const mpi_size_t wsize = DIM(s);
   mpi_size_t psize = ctx->p->nlimbs;
   mpi_limb_t mask1;
   mpi_limb_t mask2;
   mpi_limb_t s_is_negative;
   int carry;
 
   MPN_NORMALIZE (w->d, w->nlimbs);
   if (mpi_nbits_more_than (w, 2 * 224))
     log_bug ("W must be less than m^2\n");
 
   RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64);
   RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64);
   ctx->p->nlimbs = psize;
 
   wp = w->d;
 
   prefetch (p_mult, sizeof(p_mult));
 
   /* See "FIPS 186-4, D.2.2 Curve P-224". */
 
   /* "S1 + S2" with 64-bit limbs:
    *     [0:A10]:[ A9: A8]:[ A7:0]:[0:0]
    *  +    [0:0]:[A13:A12]:[A11:0]:[0:0]
    *  => s[3]:s[2]:s[1]:s[0]
    */
   s[0] = zero;
   ADD3_LIMB64 (s[3], s[2], s[1],
 	       LIMB64_HILO(0, LOAD32(wp, 10)),
 	       LOAD64(wp, 8 / 2),
 	       LIMB64_HILO(LOAD32(wp, 7), 0),
 	       zero,
 	       LOAD64(wp, 12 / 2),
 	       LIMB64_HILO(LOAD32(wp, 11), 0));
 
   /* "T + S1 + S2" */
   ADD4_LIMB64 (s[3], s[2], s[1], s[0],
 	       s[3], s[2], s[1], s[0],
 	       LIMB64_HILO(0, LOAD32(wp, 6)),
 	       LOAD64(wp, 4 / 2),
 	       LOAD64(wp, 2 / 2),
 	       LOAD64(wp, 0 / 2));
 
   /* "D1 + D2" with 64-bit limbs:
    *     [0:A13]:[A12:A11]:[A10: A9]:[ A8: A7]
    *  +    [0:0]:[  0:  0]:[  0:A13]:[A12:A11]
    *  => d[3]:d[2]:d[1]:d[0]
    */
   ADD4_LIMB64 (d[3], d[2], d[1], d[0],
 	       LIMB64_HILO(0, LOAD32(wp, 13)),
 	       LOAD64_UNALIGNED(wp, 11 / 2),
 	       LOAD64_UNALIGNED(wp, 9 / 2),
 	       LOAD64_UNALIGNED(wp, 7 / 2),
 	       zero,
 	       zero,
 	       LIMB64_HILO(0, LOAD32(wp, 13)),
 	       LOAD64_UNALIGNED(wp, 11 / 2));
 
   /* "T + S1 + S2 - D1 - D2" */
   SUB4_LIMB64 (s[3], s[2], s[1], s[0],
 	       s[3], s[2], s[1], s[0],
 	       d[3], d[2], d[1], d[0]);
 
   /* mod p:
    *  Upper 32-bits of 's[3]' holds carry value (-2..2).
    *  Subtract (carry + 1) * p. Result will be with in range -p...p.
    *  Handle result being negative with addition and conditional store. */
 
   carry = HI32_LIMB64(s[3]);
 
   SUB4_LIMB64 (s[3], s[2], s[1], s[0],
 	       s[3], s[2], s[1], s[0],
 	       p_mult[carry + 2][3], p_mult[carry + 2][2],
 	       p_mult[carry + 2][1], p_mult[carry + 2][0]);
 
   ADD4_LIMB64 (d[3], d[2], d[1], d[0],
 	       s[3], s[2], s[1], s[0],
 	       p_mult[0 + 2][3], p_mult[0 + 2][2],
 	       p_mult[0 + 2][1], p_mult[0 + 2][0]);
 
   s_is_negative = (HI32_LIMB64(s[3]) >> 31);
 
-  mask2 = _gcry_ct_vzero - s_is_negative;
-  mask1 = s_is_negative - _gcry_ct_vone;
+  mask2 = ct_limb_gen_mask(s_is_negative);
+  mask1 = ct_limb_gen_inv_mask(s_is_negative);
 
   STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]);
   STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]);
   STORE64_COND(wp, 2, mask2, d[2], mask1, s[2]);
   STORE64_COND(wp, 3, mask2, d[3], mask1, s[3]);
 
   w->nlimbs = wsize * LIMBS_PER_LIMB64;
   MPN_NORMALIZE (wp, w->nlimbs);
 }
 
 void
 _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx)
 {
   static const mpi_limb64_t p_mult[12][5] =
   {
     { /* P * -3 */
       LIMB64_C(0x00000000U, 0x00000003U), LIMB64_C(0xfffffffdU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000002U, 0xfffffffcU),
       LIMB64_C(0xffffffffU, 0xfffffffdU)
     },
     { /* P * -2 */
       LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0xfffffffeU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000001U, 0xfffffffdU),
       LIMB64_C(0xffffffffU, 0xfffffffeU)
     },
     { /* P * -1 */
       LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0xffffffffU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xfffffffeU),
       LIMB64_C(0xffffffffU, 0xffffffffU)
     },
     { /* P * 0 */
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0x00000000U, 0x00000000U)
     },
     { /* P * 1 */
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xffffffffU, 0x00000001U),
       LIMB64_C(0x00000000U, 0x00000000U)
     },
     { /* P * 2 */
       LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0x00000001U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffeU, 0x00000002U),
       LIMB64_C(0x00000000U, 0x00000001U)
     },
     { /* P * 3 */
       LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0x00000002U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffdU, 0x00000003U),
       LIMB64_C(0x00000000U, 0x00000002U)
     },
     { /* P * 4 */
       LIMB64_C(0xffffffffU, 0xfffffffcU), LIMB64_C(0x00000003U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffcU, 0x00000004U),
       LIMB64_C(0x00000000U, 0x00000003U)
     },
     { /* P * 5 */
       LIMB64_C(0xffffffffU, 0xfffffffbU), LIMB64_C(0x00000004U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffbU, 0x00000005U),
       LIMB64_C(0x00000000U, 0x00000004U)
     },
     { /* P * 6 */
       LIMB64_C(0xffffffffU, 0xfffffffaU), LIMB64_C(0x00000005U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffaU, 0x00000006U),
       LIMB64_C(0x00000000U, 0x00000005U)
     },
     { /* P * 7 */
       LIMB64_C(0xffffffffU, 0xfffffff9U), LIMB64_C(0x00000006U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffff9U, 0x00000007U),
       LIMB64_C(0x00000000U, 0x00000006U)
     }
   };
   const mpi_limb64_t zero = LIMB_TO64(0);
   mpi_ptr_t wp;
   mpi_limb64_t s[(256 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64 + 1];
   mpi_limb64_t t[DIM(s)];
   mpi_limb64_t d[DIM(s)];
   mpi_limb64_t e[DIM(s)];
   const mpi_size_t wsize = DIM(s) - 1;
   mpi_size_t psize = ctx->p->nlimbs;
   mpi_limb_t mask1;
   mpi_limb_t mask2;
   mpi_limb_t mask3;
   mpi_limb_t s_is_negative;
   mpi_limb_t d_is_negative;
   int carry;
 
   MPN_NORMALIZE (w->d, w->nlimbs);
   if (mpi_nbits_more_than (w, 2 * 256))
     log_bug ("W must be less than m^2\n");
 
   RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64);
   RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64);
   ctx->p->nlimbs = psize;
 
   wp = w->d;
 
   prefetch (p_mult, sizeof(p_mult));
 
   /* See "FIPS 186-4, D.2.3 Curve P-256". */
 
   /* "S1 + S2" with 64-bit limbs:
    *     [A15:A14]:[A13:A12]:[A11:0]:[0:0]
    *  +    [0:A15]:[A14:A13]:[A12:0]:[0:0]
    *  => s[4]:s[3]:s[2]:s[1]:s[0]
    */
   s[0] = zero;
   ADD4_LIMB64 (s[4], s[3], s[2], s[1],
 	       zero,
 	       LOAD64(wp, 14 / 2),
 	       LOAD64(wp, 12 / 2),
 	       LIMB64_HILO(LOAD32(wp, 11), 0),
 	       zero,
 	       LIMB64_HILO(0, LOAD32(wp, 15)),
 	       LOAD64_UNALIGNED(wp, 13 / 2),
 	       LIMB64_HILO(LOAD32(wp, 12), 0));
 
   /* "S3 + S4" with 64-bit limbs:
    *     [A15:A14]:[  0:  0]:[  0:A10]:[ A9:A8]
    *  +   [A8:A13]:[A15:A14]:[A13:A11]:[A10:A9]
    *  => t[4]:t[3]:t[2]:t[1]:t[0]
    */
   ADD5_LIMB64 (t[4], t[3], t[2], t[1], t[0],
 	       zero,
 	       LOAD64(wp, 14 / 2),
 	       zero,
 	       LIMB64_HILO(0, LOAD32(wp, 10)),
 	       LOAD64(wp, 8 / 2),
 	       zero,
 	       LIMB64_HILO(LOAD32(wp, 8), LOAD32(wp, 13)),
 	       LOAD64(wp, 14 / 2),
 	       LIMB64_HILO(LOAD32(wp, 13), LOAD32(wp, 11)),
 	       LOAD64_UNALIGNED(wp, 9 / 2));
 
   /* "2*S1 + 2*S2" */
   ADD5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
                s[4], s[3], s[2], s[1], s[0],
                s[4], s[3], s[2], s[1], s[0]);
 
   /* "T + S3 + S4" */
   ADD5_LIMB64 (t[4], t[3], t[2], t[1], t[0],
 	       t[4], t[3], t[2], t[1], t[0],
 	       zero,
 	       LOAD64(wp, 6 / 2),
 	       LOAD64(wp, 4 / 2),
 	       LOAD64(wp, 2 / 2),
 	       LOAD64(wp, 0 / 2));
 
   /* "2*S1 + 2*S2 - D3" with 64-bit limbs:
    *    s[4]:    s[3]:    s[2]:    s[1]:     s[0]
    *  -       [A12:0]:[A10:A9]:[A8:A15]:[A14:A13]
    *  => s[4]:s[3]:s[2]:s[1]:s[0]
    */
   SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
                s[4], s[3], s[2], s[1], s[0],
 	       zero,
 	       LIMB64_HILO(LOAD32(wp, 12), 0),
 	       LOAD64_UNALIGNED(wp, 9 / 2),
 	       LIMB64_HILO(LOAD32(wp, 8), LOAD32(wp, 15)),
 	       LOAD64_UNALIGNED(wp, 13 / 2));
 
   /* "T + 2*S1 + 2*S2 + S3 + S4 - D3" */
   ADD5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
                s[4], s[3], s[2], s[1], s[0],
                t[4], t[3], t[2], t[1], t[0]);
 
   /* "D1 + D2" with 64-bit limbs:
    *     [0:A13]:[A12:A11] + [A15:A14]:[A13:A12] => d[2]:d[1]:d[0]
    *     [A10:A8] + [A11:A9] => d[4]:d[3]
    */
   ADD3_LIMB64 (d[2], d[1], d[0],
 	       zero,
 	       LIMB64_HILO(0, LOAD32(wp, 13)),
 	       LOAD64_UNALIGNED(wp, 11 / 2),
 	       zero,
 	       LOAD64(wp, 14 / 2),
 	       LOAD64(wp, 12 / 2));
   ADD2_LIMB64 (d[4], d[3],
 	       zero, LIMB64_HILO(LOAD32(wp, 10), LOAD32(wp, 8)),
 	       zero, LIMB64_HILO(LOAD32(wp, 11), LOAD32(wp, 9)));
 
   /* "D1 + D2 + D4" with 64-bit limbs:
    *    d[4]:    d[3]:     d[2]:  d[1]:     d[0]
    *  -       [A13:0]:[A11:A10]:[A9:0]:[A15:A14]
    *  => d[4]:d[3]:d[2]:d[1]:d[0]
    */
   ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0],
                d[4], d[3], d[2], d[1], d[0],
 	       zero,
 	       LIMB64_HILO(LOAD32(wp, 13), 0),
 	       LOAD64(wp, 10 / 2),
 	       LIMB64_HILO(LOAD32(wp, 9), 0),
 	       LOAD64(wp, 14 / 2));
 
   /* "T + 2*S1 + 2*S2 + S3 + S4 - D1 - D2 - D3 - D4" */
   SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
                s[4], s[3], s[2], s[1], s[0],
                d[4], d[3], d[2], d[1], d[0]);
 
   /* mod p:
    *  's[4]' holds carry value (-4..6). Subtract (carry + 1) * p. Result
    *  will be with in range -2*p...p. Handle result being negative with
    *  addition and conditional store. */
 
   carry = LO32_LIMB64(s[4]);
 
   SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
 	       s[4], s[3], s[2], s[1], s[0],
 	       p_mult[carry + 4][4], p_mult[carry + 4][3],
 	       p_mult[carry + 4][2], p_mult[carry + 4][1],
 	       p_mult[carry + 4][0]);
 
   /* Add 1*P */
   ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0],
 	       s[4], s[3], s[2], s[1], s[0],
 	       zero,
 	       p_mult[0 + 4][3], p_mult[0 + 4][2],
 	       p_mult[0 + 4][1], p_mult[0 + 4][0]);
 
   /* Add 2*P */
   ADD5_LIMB64 (e[4], e[3], e[2], e[1], e[0],
 	       s[4], s[3], s[2], s[1], s[0],
 	       zero,
 	       p_mult[1 + 4][3], p_mult[1 + 4][2],
 	       p_mult[1 + 4][1], p_mult[1 + 4][0]);
 
   s_is_negative = LO32_LIMB64(s[4]) >> 31;
   d_is_negative = LO32_LIMB64(d[4]) >> 31;
-  mask3 = _gcry_ct_vzero - d_is_negative;
-  mask2 = (_gcry_ct_vzero - s_is_negative) & ~mask3;
-  mask1 = (s_is_negative - _gcry_ct_vone) & ~mask3;
+  mask3 = ct_limb_gen_mask(d_is_negative);
+  mask2 = ct_limb_gen_mask(s_is_negative) & ~mask3;
+  mask1 = ct_limb_gen_inv_mask(s_is_negative) & ~mask3;
 
   s[0] = LIMB_OR64(MASK_AND64(mask2, d[0]), MASK_AND64(mask1, s[0]));
   s[1] = LIMB_OR64(MASK_AND64(mask2, d[1]), MASK_AND64(mask1, s[1]));
   s[2] = LIMB_OR64(MASK_AND64(mask2, d[2]), MASK_AND64(mask1, s[2]));
   s[3] = LIMB_OR64(MASK_AND64(mask2, d[3]), MASK_AND64(mask1, s[3]));
   s[0] = LIMB_OR64(MASK_AND64(mask3, e[0]), s[0]);
   s[1] = LIMB_OR64(MASK_AND64(mask3, e[1]), s[1]);
   s[2] = LIMB_OR64(MASK_AND64(mask3, e[2]), s[2]);
   s[3] = LIMB_OR64(MASK_AND64(mask3, e[3]), s[3]);
 
   STORE64(wp, 0, s[0]);
   STORE64(wp, 1, s[1]);
   STORE64(wp, 2, s[2]);
   STORE64(wp, 3, s[3]);
 
   w->nlimbs = wsize * LIMBS_PER_LIMB64;
   MPN_NORMALIZE (wp, w->nlimbs);
 }
 
 void
 _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx)
 {
   static const mpi_limb64_t p_mult[11][7] =
   {
     { /* P * -2 */
       LIMB64_C(0xfffffffeU, 0x00000002U), LIMB64_C(0x00000001U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffffeU)
     },
     { /* P * -1 */
       LIMB64_C(0xffffffffU, 0x00000001U), LIMB64_C(0x00000000U, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xffffffffU)
     },
     { /* P * 0 */
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
       LIMB64_C(0x00000000U, 0x00000000U)
     },
     { /* P * 1 */
       LIMB64_C(0x00000000U, 0xffffffffU), LIMB64_C(0xffffffffU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000000U)
     },
     { /* P * 2 */
       LIMB64_C(0x00000001U, 0xfffffffeU), LIMB64_C(0xfffffffeU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000001U)
     },
     { /* P * 3 */
       LIMB64_C(0x00000002U, 0xfffffffdU), LIMB64_C(0xfffffffdU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffffcU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000002U)
     },
     { /* P * 4 */
       LIMB64_C(0x00000003U, 0xfffffffcU), LIMB64_C(0xfffffffcU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffffbU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000003U)
     },
     { /* P * 5 */
       LIMB64_C(0x00000004U, 0xfffffffbU), LIMB64_C(0xfffffffbU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffffaU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000004U)
     },
     { /* P * 6 */
       LIMB64_C(0x00000005U, 0xfffffffaU), LIMB64_C(0xfffffffaU, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffff9U), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000005U)
     },
     { /* P * 7 */
       LIMB64_C(0x00000006U, 0xfffffff9U), LIMB64_C(0xfffffff9U, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffff8U), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000006U)
     },
     { /* P * 8 */
       LIMB64_C(0x00000007U, 0xfffffff8U), LIMB64_C(0xfffffff8U, 0x00000000U),
       LIMB64_C(0xffffffffU, 0xfffffff7U), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
       LIMB64_C(0x00000000U, 0x00000007U)
     },
   };
   const mpi_limb64_t zero = LIMB_TO64(0);
   mpi_ptr_t wp;
   mpi_limb64_t s[(384 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64 + 1];
   mpi_limb64_t t[DIM(s)];
   mpi_limb64_t d[DIM(s)];
   mpi_limb64_t x[DIM(s)];
 #if (BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB) && defined(WORDS_BIGENDIAN)
   mpi_limb_t wp_shr32[(DIM(s) - 1) * LIMBS_PER_LIMB64];
 #endif
   const mpi_size_t wsize = DIM(s) - 1;
   mpi_size_t psize = ctx->p->nlimbs;
   mpi_limb_t mask1;
   mpi_limb_t mask2;
   mpi_limb_t s_is_negative;
   int carry;
 
   MPN_NORMALIZE (w->d, w->nlimbs);
   if (mpi_nbits_more_than (w, 2 * 384))
     log_bug ("W must be less than m^2\n");
 
   RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64);
   RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64);
   ctx->p->nlimbs = psize;
 
   wp = w->d;
 
   prefetch (p_mult, sizeof(p_mult));
 
   /* See "FIPS 186-4, D.2.4 Curve P-384". */
 
 #if BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB
 # ifdef WORDS_BIGENDIAN
 #  define LOAD64_SHR32(idx) LOAD64(wp_shr32, ((idx) / 2 - wsize))
   _gcry_mpih_rshift (wp_shr32, wp + 384 / BITS_PER_MPI_LIMB,
 		     wsize * LIMBS_PER_LIMB64, 32);
 # else
 # define LOAD64_SHR32(idx) LOAD64_UNALIGNED(wp, idx / 2)
 #endif
 #else
 # define LOAD64_SHR32(idx) LIMB64_HILO(LOAD32(wp, (idx) + 1), LOAD32(wp, idx))
 #endif
 
   /* "S1 + S1" with 64-bit limbs:
    *     [0:A23]:[A22:A21]
    *  +  [0:A23]:[A22:A21]
    *  => s[3]:s[2]
    */
   ADD2_LIMB64 (s[3], s[2],
 	       LIMB64_HILO(0, LOAD32(wp, 23)),
 	       LOAD64_SHR32(21),
 	       LIMB64_HILO(0, LOAD32(wp, 23)),
 	       LOAD64_SHR32(21));
 
   /* "S5 + S6" with 64-bit limbs:
    *     [A23:A22]:[A21:A20]:[  0:0]:[0:  0]
    *  +  [  0:  0]:[A23:A22]:[A21:0]:[0:A20]
    *  => x[4]:x[3]:x[2]:x[1]:x[0]
    */
   x[0] = LIMB64_HILO(0, LOAD32(wp, 20));
   x[1] = LIMB64_HILO(LOAD32(wp, 21), 0);
   ADD3_LIMB64 (x[4], x[3], x[2],
 	       zero, LOAD64(wp, 22 / 2), LOAD64(wp, 20 / 2),
 	       zero, zero, LOAD64(wp, 22 / 2));
 
   /* "D2 + D3" with 64-bit limbs:
    *     [0:A23]:[A22:A21]:[A20:0]
    *  +  [0:A23]:[A23:0]:[0:0]
    *  => d[2]:d[1]:d[0]
    */
   d[0] = LIMB64_HILO(LOAD32(wp, 20), 0);
   ADD2_LIMB64 (d[2], d[1],
 	       LIMB64_HILO(0, LOAD32(wp, 23)),
 	       LOAD64_SHR32(21),
 	       LIMB64_HILO(0, LOAD32(wp, 23)),
 	       LIMB64_HILO(LOAD32(wp, 23), 0));
 
   /* "2*S1 + S5 + S6" with 64-bit limbs:
    *     s[4]:s[3]:s[2]:s[1]:s[0]
    *  +  x[4]:x[3]:x[2]:x[1]:x[0]
    *  => s[4]:s[3]:s[2]:s[1]:s[0]
    */
   s[0] = x[0];
   s[1] = x[1];
   ADD3_LIMB64(s[4], s[3], s[2],
 	      zero, s[3], s[2],
 	      x[4], x[3], x[2]);
 
   /* "T + S2" with 64-bit limbs:
    *     [A11:A10]:[ A9: A8]:[ A7: A6]:[ A5: A4]:[ A3: A2]:[ A1: A0]
    *  +  [A23:A22]:[A21:A20]:[A19:A18]:[A17:A16]:[A15:A14]:[A13:A12]
    *  => t[6]:t[5]:t[4]:t[3]:t[2]:t[1]:t[0]
    */
   ADD7_LIMB64 (t[6], t[5], t[4], t[3], t[2], t[1], t[0],
 	       zero,
 	       LOAD64(wp, 10 / 2), LOAD64(wp, 8 / 2), LOAD64(wp, 6 / 2),
 	       LOAD64(wp, 4 / 2), LOAD64(wp, 2 / 2), LOAD64(wp, 0 / 2),
 	       zero,
 	       LOAD64(wp, 22 / 2), LOAD64(wp, 20 / 2), LOAD64(wp, 18 / 2),
 	       LOAD64(wp, 16 / 2), LOAD64(wp, 14 / 2), LOAD64(wp, 12 / 2));
 
   /* "2*S1 + S4 + S5 + S6" with 64-bit limbs:
    *     s[6]:     s[5]:     s[4]:     s[3]:     s[2]:   s[1]:   s[0]
    *  +       [A19:A18]:[A17:A16]:[A15:A14]:[A13:A12]:[A20:0]:[A23:0]
    *  => s[6]:s[5]:s[4]:s[3]:s[2]:s[1]:s[0]
    */
   ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
 	       zero, zero, s[4], s[3], s[2], s[1], s[0],
 	       zero,
 	       LOAD64(wp, 18 / 2), LOAD64(wp, 16 / 2),
 	       LOAD64(wp, 14 / 2), LOAD64(wp, 12 / 2),
 	       LIMB64_HILO(LOAD32(wp, 20), 0),
 	       LIMB64_HILO(LOAD32(wp, 23), 0));
 
   /* "D1 + D2 + D3" with 64-bit limbs:
    *     d[6]:     d[5]:     d[4]:     d[3]:     d[2]:     d[1]:     d[0]
    *  +       [A22:A21]:[A20:A19]:[A18:A17]:[A16:A15]:[A14:A13]:[A12:A23]
    *  => d[6]:d[5]:d[4]:d[3]:d[2]:d[1]:d[0]
    */
   ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0],
 	       zero, zero, zero, zero, d[2], d[1], d[0],
 	       zero,
 	       LOAD64_SHR32(21),
 	       LOAD64_SHR32(19),
 	       LOAD64_SHR32(17),
 	       LOAD64_SHR32(15),
 	       LOAD64_SHR32(13),
 	       LIMB64_HILO(LOAD32(wp, 12), LOAD32(wp, 23)));
 
   /* "2*S1 + S3 + S4 + S5 + S6" with 64-bit limbs:
    *     s[6]:     s[5]:     s[4]:     s[3]:     s[2]:     s[1]:     s[0]
    *  +       [A20:A19]:[A18:A17]:[A16:A15]:[A14:A13]:[A12:A23]:[A22:A21]
    *  => s[6]:s[5]:s[4]:s[3]:s[2]:s[1]:s[0]
    */
   ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
 	       s[6], s[5], s[4], s[3], s[2], s[1], s[0],
 	       zero,
 	       LOAD64_SHR32(19),
 	       LOAD64_SHR32(17),
 	       LOAD64_SHR32(15),
 	       LOAD64_SHR32(13),
 	       LIMB64_HILO(LOAD32(wp, 12), LOAD32(wp, 23)),
 	       LOAD64_SHR32(21));
 
   /* "T + 2*S1 + S2 + S3 + S4 + S5 + S6" */
   ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
                s[6], s[5], s[4], s[3], s[2], s[1], s[0],
                t[6], t[5], t[4], t[3], t[2], t[1], t[0]);
 
   /* "T + 2*S1 + S2 + S3 + S4 + S5 + S6 - D1 - D2 - D3" */
   SUB7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
                s[6], s[5], s[4], s[3], s[2], s[1], s[0],
                d[6], d[5], d[4], d[3], d[2], d[1], d[0]);
 
 #undef LOAD64_SHR32
 
   /* mod p:
    *  's[6]' holds carry value (-3..7). Subtract (carry + 1) * p. Result
    *  will be with in range -p...p. Handle result being negative with
    *  addition and conditional store. */
 
   carry = LO32_LIMB64(s[6]);
 
   SUB7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
 	       s[6], s[5], s[4], s[3], s[2], s[1], s[0],
 	       p_mult[carry + 3][6], p_mult[carry + 3][5],
 	       p_mult[carry + 3][4], p_mult[carry + 3][3],
 	       p_mult[carry + 3][2], p_mult[carry + 3][1],
 	       p_mult[carry + 3][0]);
 
   ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0],
 	       s[6], s[5], s[4], s[3], s[2], s[1], s[0],
 	       zero,
 	       p_mult[0 + 3][5], p_mult[0 + 3][4],
 	       p_mult[0 + 3][3], p_mult[0 + 3][2],
 	       p_mult[0 + 3][1], p_mult[0 + 3][0]);
 
   s_is_negative = LO32_LIMB64(s[6]) >> 31;
-  mask2 = _gcry_ct_vzero - s_is_negative;
-  mask1 = s_is_negative - _gcry_ct_vone;
+  mask2 = ct_limb_gen_mask(s_is_negative);
+  mask1 = ct_limb_gen_inv_mask(s_is_negative);
 
   STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]);
   STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]);
   STORE64_COND(wp, 2, mask2, d[2], mask1, s[2]);
   STORE64_COND(wp, 3, mask2, d[3], mask1, s[3]);
   STORE64_COND(wp, 4, mask2, d[4], mask1, s[4]);
   STORE64_COND(wp, 5, mask2, d[5], mask1, s[5]);
 
   w->nlimbs = wsize * LIMBS_PER_LIMB64;
   MPN_NORMALIZE (wp, w->nlimbs);
 
 #if (BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB) && defined(WORDS_BIGENDIAN)
   wipememory(wp_shr32, sizeof(wp_shr32));
 #endif
 }
 
 void
 _gcry_mpi_ec_nist521_mod (gcry_mpi_t w, mpi_ec_t ctx)
 {
   mpi_limb_t s[(521 + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB];
   const mpi_size_t wsize = DIM(s);
   mpi_limb_t cy;
   mpi_ptr_t wp;
 
   MPN_NORMALIZE (w->d, w->nlimbs);
   if (mpi_nbits_more_than (w, 2 * 521))
     log_bug ("W must be less than m^2\n");
 
   RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2);
 
   wp = w->d;
 
   /* See "FIPS 186-4, D.2.5 Curve P-521". */
 
   _gcry_mpih_rshift (s, wp + wsize - 1, wsize, 521 % BITS_PER_MPI_LIMB);
   s[wsize - 1] &= (1 << (521 % BITS_PER_MPI_LIMB)) - 1;
   wp[wsize - 1] &= (1 << (521 % BITS_PER_MPI_LIMB)) - 1;
   _gcry_mpih_add_n (wp, wp, s, wsize);
 
   /* "mod p" */
   cy = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize);
   _gcry_mpih_add_n (s, wp, ctx->p->d, wsize);
   mpih_set_cond (wp, s, wsize, mpih_limb_is_not_zero (cy));
 
   w->nlimbs = wsize;
   MPN_NORMALIZE (wp, w->nlimbs);
 }
 
 #endif /* !ASM_DISABLED */
diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h
index 70045037..935bf3e1 100644
--- a/mpi/mpi-internal.h
+++ b/mpi/mpi-internal.h
@@ -1,323 +1,327 @@
 /* mpi-internal.h  -  Internal to the Multi Precision Integers
  * Copyright (C) 1994, 1996, 1998, 2000, 2002,
  *               2003 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not, see <https://www.gnu.org/licenses/>.
  * SPDX-License-Identifier: LGPL-2.1-or-later
  *
  * Note: This code is heavily based on the GNU MP Library.
  *	 Actually it's the same code with only minor changes in the
  *	 way the data is stored; this is to support the abstraction
  *	 of an optional secure memory allocation which may be used
  *	 to avoid revealing of sensitive data due to paging etc.
  */
 
 #ifndef G10_MPI_INTERNAL_H
 #define G10_MPI_INTERNAL_H
 
 #include "mpi-asm-defs.h"
 
 #ifndef BITS_PER_MPI_LIMB
 #if BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_INT
   typedef unsigned int mpi_limb_t;
   typedef   signed int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG
   typedef unsigned long int mpi_limb_t;
   typedef   signed long int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG_LONG
   typedef unsigned long long int mpi_limb_t;
   typedef   signed long long int mpi_limb_signed_t;
 #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_SHORT
   typedef unsigned short int mpi_limb_t;
   typedef   signed short int mpi_limb_signed_t;
 #else
 #error BYTES_PER_MPI_LIMB does not match any C type
 #endif
 #define BITS_PER_MPI_LIMB    (8*BYTES_PER_MPI_LIMB)
 #endif /*BITS_PER_MPI_LIMB*/
 
 #include "mpi.h"
+#include "const-time.h"
 
 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
  * value which is good on most machines.  */
 
 /* tested 4, 16, 32 and 64, where 16 gave the best performance when
  * checking a 768 and a 1024 bit ElGamal signature.
  * (wk 22.12.97) */
 #ifndef KARATSUBA_THRESHOLD
 #define KARATSUBA_THRESHOLD 16
 #endif
 
 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
 #if KARATSUBA_THRESHOLD < 2
 #undef KARATSUBA_THRESHOLD
 #define KARATSUBA_THRESHOLD 2
 #endif
 
 
 typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */
 typedef int mpi_size_t;        /* (must be a signed type) */
 
 #define ABS(x) (x >= 0 ? x : -x)
 #define MIN(l,o) ((l) < (o) ? (l) : (o))
 #define MAX(h,i) ((h) > (i) ? (h) : (i))
 #define RESIZE_IF_NEEDED(a,b) \
     do {			   \
 	if( (a)->alloced < (b) )   \
 	    mpi_resize((a), (b));  \
     } while(0)
 #define RESIZE_AND_CLEAR_IF_NEEDED(a,b) \
     do {			   \
 	if( (a)->nlimbs < (b) )   \
 	    mpi_resize((a), (b));  \
     } while(0)
 
 /* Copy N limbs from S to D.  */
 #define MPN_COPY( d, s, n) \
     do {				\
 	mpi_size_t _i;			\
 	for( _i = 0; _i < (n); _i++ )	\
 	    (d)[_i] = (s)[_i];		\
     } while(0)
 
 #define MPN_COPY_INCR( d, s, n) 	\
     do {				\
 	mpi_size_t _i;			\
 	for( _i = 0; _i < (n); _i++ )	\
 	    (d)[_i] = (s)[_i];		\
     } while (0)
 
 #define MPN_COPY_DECR( d, s, n ) \
     do {				\
 	mpi_size_t _i;			\
 	for( _i = (n)-1; _i >= 0; _i--) \
 	   (d)[_i] = (s)[_i];		\
     } while(0)
 
 /* Zero N limbs at D */
 #define MPN_ZERO(d, n) \
     do {				  \
 	int  _i;			  \
 	for( _i = 0; _i < (n); _i++ )  \
 	    (d)[_i] = 0;		    \
     } while (0)
 
 #define MPN_NORMALIZE(d, n)  \
     do {		       \
 	while( (n) > 0 ) {     \
 	    if( (d)[(n)-1] ) \
 		break;	       \
 	    (n)--;	       \
 	}		       \
     } while(0)
 
 #define MPN_NORMALIZE_NOT_ZERO(d, n) \
     do {				    \
 	for(;;) {			    \
 	    if( (d)[(n)-1] )		    \
 		break;			    \
 	    (n)--;			    \
 	}				    \
     } while(0)
 
 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
     do {						\
 	if( (size) < KARATSUBA_THRESHOLD )		\
 	    mul_n_basecase (prodp, up, vp, size);	\
 	else						\
 	    mul_n (prodp, up, vp, size, tspace);	\
     } while (0)
 
 
 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
  * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
  * If this would yield overflow, DI should be the largest possible number
  * (i.e., only ones).  For correct operation, the most significant bit of D
  * has to be set.  Put the quotient in Q and the remainder in R.
  */
 #define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
     do {							    \
         mpi_limb_t _ql GCC_ATTR_UNUSED;                               \
 	mpi_limb_t _q, _r;                                          \
 	mpi_limb_t _xh, _xl;					    \
 	umul_ppmm (_q, _ql, (nh), (di));			    \
 	_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */  \
 	umul_ppmm (_xh, _xl, _q, (d));				    \
 	sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);		    \
 	if( _xh ) {						    \
 	    sub_ddmmss (_xh, _r, _xh, _r, 0, (d));		    \
 	    _q++;						    \
 	    if( _xh) {						    \
 		sub_ddmmss (_xh, _r, _xh, _r, 0, (d));		    \
 		_q++;						    \
 	    }							    \
 	}							    \
 	if( _r >= (d) ) {					    \
 	    _r -= (d);						    \
 	    _q++;						    \
 	}							    \
 	(r) = _r;						    \
 	(q) = _q;						    \
     } while (0)
 
 
 /*-- mpiutil.c --*/
 #define mpi_alloc_limb_space(n,f)  _gcry_mpi_alloc_limb_space((n),(f))
 mpi_ptr_t _gcry_mpi_alloc_limb_space( unsigned nlimbs, int sec );
 void _gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs );
 void _gcry_mpi_assign_limb_space( gcry_mpi_t a, mpi_ptr_t ap, unsigned nlimbs );
 
 /*-- mpi-bit.c --*/
 #define mpi_rshift_limbs(a,n)  _gcry_mpi_rshift_limbs ((a), (n))
 #define mpi_lshift_limbs(a,n)  _gcry_mpi_lshift_limbs ((a), (n))
 
 void _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count );
 void _gcry_mpi_lshift_limbs( gcry_mpi_t a, unsigned int count );
 
 
 /*-- mpih-add.c --*/
 mpi_limb_t _gcry_mpih_add_1(mpi_ptr_t res_ptr,  mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb );
 mpi_limb_t _gcry_mpih_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			  mpi_ptr_t s2_ptr,  mpi_size_t size);
 mpi_limb_t _gcry_mpih_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
 /*-- mpih-sub.c --*/
 mpi_limb_t _gcry_mpih_sub_1( mpi_ptr_t res_ptr,  mpi_ptr_t s1_ptr,
 			  mpi_size_t s1_size, mpi_limb_t s2_limb );
 mpi_limb_t _gcry_mpih_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			  mpi_ptr_t s2_ptr, mpi_size_t size);
 mpi_limb_t _gcry_mpih_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
 /*-- mpih-cmp.c --*/
 int _gcry_mpih_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size );
 
 /*-- mpih-mul.c --*/
 
 struct karatsuba_ctx {
     struct karatsuba_ctx *next;
     mpi_ptr_t tspace;
     unsigned int tspace_nlimbs;
     mpi_size_t tspace_size;
     mpi_ptr_t tp;
     unsigned int tp_nlimbs;
     mpi_size_t tp_size;
 };
 
 void _gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx );
 
 mpi_limb_t _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			     mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t _gcry_mpih_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			     mpi_size_t s1_size, mpi_limb_t s2_limb);
 void _gcry_mpih_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
 						   mpi_size_t size);
 mpi_limb_t _gcry_mpih_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 					 mpi_ptr_t vp, mpi_size_t vsize);
 void _gcry_mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size );
 void _gcry_mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
 						mpi_ptr_t tspace);
 
 void _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp,
 				 mpi_ptr_t up, mpi_size_t usize,
 				 mpi_ptr_t vp, mpi_size_t vsize,
 				 struct karatsuba_ctx *ctx );
 
 
 /*-- mpih-mul_1.c (or xxx/cpu/ *.S) --*/
 mpi_limb_t _gcry_mpih_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			  mpi_size_t s1_size, mpi_limb_t s2_limb);
 
 /*-- mpih-div.c --*/
 mpi_limb_t _gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
 						 mpi_limb_t divisor_limb);
 mpi_limb_t _gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
 			   mpi_ptr_t np, mpi_size_t nsize,
 			   mpi_ptr_t dp, mpi_size_t dsize);
 mpi_limb_t _gcry_mpih_divmod_1( mpi_ptr_t quot_ptr,
 			     mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
 			     mpi_limb_t divisor_limb);
 
 /*-- mpih-shift.c --*/
 mpi_limb_t _gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
 							   unsigned cnt);
 mpi_limb_t _gcry_mpih_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
 							   unsigned cnt);
 
 /*-- mpih-const-time.c --*/
 #define mpih_set_cond(w,u,s,o) _gcry_mpih_set_cond ((w),(u),(s),(o))
 #define mpih_add_n_cond(w,u,v,s,o) _gcry_mpih_add_n_cond ((w),(u),(v),(s),(o))
 #define mpih_sub_n_cond(w,u,v,s,o) _gcry_mpih_sub_n_cond ((w),(u),(v),(s),(o))
 #define mpih_swap_cond(u,v,s,o) _gcry_mpih_swap_cond ((u),(v),(s),(o))
 #define mpih_abs_cond(w,u,s,o) _gcry_mpih_abs_cond ((w),(u),(s),(o))
 #define mpih_mod(v,vs,u,us) _gcry_mpih_mod ((v),(vs),(u),(us))
 
+DEFINE_CT_TYPE_GEN_MASK(limb, mpi_limb_t)
+DEFINE_CT_TYPE_GEN_INV_MASK(limb, mpi_limb_t)
+
 static inline int
 mpih_limb_is_zero (mpi_limb_t a)
 {
   /* Sign bit set if A == 0. */
   a = ~a & ~(-a);
 
   return a >> (BITS_PER_MPI_LIMB - 1);
 }
 
 static inline int
 mpih_limb_is_not_zero (mpi_limb_t a)
 {
   /* Sign bit set if A != 0. */
   a = a | (-a);
 
   return a >> (BITS_PER_MPI_LIMB - 1);
 }
 
 void _gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
                           unsigned long op_enable);
 mpi_limb_t _gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
                                   mpi_size_t usize, unsigned long op_enable);
 mpi_limb_t _gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
                                   mpi_size_t usize, unsigned long op_enable);
 void _gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize,
                            unsigned long op_enable);
 void _gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up,
                           mpi_size_t usize, unsigned long op_enable);
 mpi_ptr_t _gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize,
                           mpi_ptr_t up, mpi_size_t usize);
 int _gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v);
 
 
 /* Define stuff for longlong.h.  */
 #define W_TYPE_SIZE BITS_PER_MPI_LIMB
   typedef mpi_limb_t   UWtype;
   typedef unsigned int UHWtype;
 #if defined (__GNUC__)
   typedef unsigned int UQItype	  __attribute__ ((mode (QI)));
   typedef	   int SItype	  __attribute__ ((mode (SI)));
   typedef unsigned int USItype	  __attribute__ ((mode (SI)));
   typedef	   int DItype	  __attribute__ ((mode (DI)));
   typedef unsigned int UDItype	  __attribute__ ((mode (DI)));
 #else
   typedef unsigned char UQItype;
   typedef	   long SItype;
   typedef unsigned long USItype;
 #endif
 
 #ifdef __GNUC__
 #include "mpi-inline.h"
 #endif
 
 #endif /*G10_MPI_INTERNAL_H*/
diff --git a/mpi/mpih-const-time.c b/mpi/mpih-const-time.c
index 3d854e8c..4f563cb8 100644
--- a/mpi/mpih-const-time.c
+++ b/mpi/mpih-const-time.c
@@ -1,217 +1,217 @@
 /* mpih-const-time.c  -  Constant-time MPI helper functions
  *      Copyright (C) 2020  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "mpi-internal.h"
 #include "g10lib.h"
 #include "const-time.h"
 
 #define A_LIMB_1 ((mpi_limb_t)1)
 
 
 /*
  *  W = U when OP_ENABLED=1
  *  otherwise, W keeps old value
  */
 void
 _gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
                      unsigned long op_enable)
 {
   /* Note: dual mask with AND/OR used for EM leakage mitigation */
-  mpi_limb_t mask1 = _gcry_ct_vzero - op_enable;
-  mpi_limb_t mask2 = op_enable - _gcry_ct_vone;
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
   mpi_size_t i;
 
   for (i = 0; i < usize; i++)
     {
       wp[i] = (wp[i] & mask2) | (up[i] & mask1);
     }
 }
 
 
 /*
  *  W = U + V when OP_ENABLED=1
  *  otherwise, W = U
  */
 mpi_limb_t
 _gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
                        mpi_size_t usize, unsigned long op_enable)
 {
   /* Note: dual mask with AND/OR used for EM leakage mitigation */
-  mpi_limb_t mask1 = _gcry_ct_vzero - op_enable;
-  mpi_limb_t mask2 = op_enable - _gcry_ct_vone;
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
   mpi_size_t i;
   mpi_limb_t cy;
 
   cy = 0;
   for (i = 0; i < usize; i++)
     {
       mpi_limb_t u = up[i];
       mpi_limb_t x = u + vp[i];
       mpi_limb_t cy1 = x < u;
       mpi_limb_t cy2;
 
       x = x + cy;
       cy2 = x < cy;
       cy = cy1 | cy2;
       wp[i] = (u & mask2) | (x & mask1);
     }
 
   return cy & mask1;
 }
 
 
 /*
  *  W = U - V when OP_ENABLED=1
  *  otherwise, W = U
  */
 mpi_limb_t
 _gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
                        mpi_size_t usize, unsigned long op_enable)
 {
   /* Note: dual mask with AND/OR used for EM leakage mitigation */
-  mpi_limb_t mask1 = _gcry_ct_vzero - op_enable;
-  mpi_limb_t mask2 = op_enable - _gcry_ct_vone;
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
   mpi_size_t i;
   mpi_limb_t cy;
 
   cy = 0;
   for (i = 0; i < usize; i++)
     {
       mpi_limb_t u = up[i];
       mpi_limb_t x = u - vp[i];
       mpi_limb_t cy1 = x > u;
       mpi_limb_t cy2;
 
       cy2 = x < cy;
       x = x - cy;
       cy = cy1 | cy2;
       wp[i] = (u & mask2) | (x & mask1);
     }
 
   return cy & mask1;
 }
 
 
 /*
  *  Swap value of U and V when OP_ENABLED=1
  *  otherwise, no change
  */
 void
 _gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize,
                       unsigned long op_enable)
 {
   /* Note: dual mask with AND/OR used for EM leakage mitigation */
-  mpi_limb_t mask1 = _gcry_ct_vzero - op_enable;
-  mpi_limb_t mask2 = op_enable - _gcry_ct_vone;
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
   mpi_size_t i;
 
   for (i = 0; i < usize; i++)
     {
       mpi_limb_t u = up[i];
       mpi_limb_t v = vp[i];
       up[i] = (u & mask2) | (v & mask1);
       vp[i] = (u & mask1) | (v & mask2);
     }
 }
 
 
 /*
  *  W = -U when OP_ENABLED=1
  *  otherwise, W = U
  */
 void
 _gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
                      unsigned long op_enable)
 {
   /* Note: dual mask with AND/OR used for EM leakage mitigation */
-  mpi_limb_t mask1 = _gcry_ct_vzero - op_enable;
-  mpi_limb_t mask2 = op_enable - _gcry_ct_vone;
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
   mpi_limb_t cy = op_enable;
   mpi_size_t i;
 
   for (i = 0; i < usize; i++)
     {
       mpi_limb_t u = up[i];
       mpi_limb_t x = ~u + cy;
 
       cy = (x < ~u);
       wp[i] = (u & mask2) | (x & mask1);
     }
 }
 
 
 /*
  * Allocating memory for W,
  * compute W = V % U, then return W
  */
 mpi_ptr_t
 _gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize,
                 mpi_ptr_t up, mpi_size_t usize)
 {
   int secure;
   mpi_ptr_t rp;
   mpi_size_t i;
 
   secure = _gcry_is_secure (vp);
   rp = mpi_alloc_limb_space (usize, secure);
   MPN_ZERO (rp, usize);
 
   for (i = 0; i < vsize * BITS_PER_MPI_LIMB; i++)
     {
       unsigned int j = vsize * BITS_PER_MPI_LIMB - 1 - i;
       unsigned int limbno = j / BITS_PER_MPI_LIMB;
       unsigned int bitno = j % BITS_PER_MPI_LIMB;
       mpi_limb_t limb = vp[limbno];
       unsigned int the_bit = ((limb & (A_LIMB_1 << bitno)) ? 1 : 0);
       mpi_limb_t underflow;
       mpi_limb_t overflow;
 
       overflow = _gcry_mpih_lshift (rp, rp, usize, 1);
       rp[0] |= the_bit;
 
       underflow = _gcry_mpih_sub_n (rp, rp, up, usize);
       mpih_add_n_cond (rp, rp, up, usize, overflow ^ underflow);
     }
 
   return rp;
 }
 
 int
 _gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v)
 {
   int is_all_zero = 1;
   mpi_size_t i;
 
   for (i = 1; i < usize; i++)
     is_all_zero &= mpih_limb_is_zero (up[i]);
 
   if (is_all_zero)
     {
       if (up[0] < v)
         return -1;
       else if (up[0] > v)
         return 1;
       else
         return 0;
     }
   return 1;
 }
diff --git a/mpi/mpiutil.c b/mpi/mpiutil.c
index f7506718..07cef257 100644
--- a/mpi/mpiutil.c
+++ b/mpi/mpiutil.c
@@ -1,792 +1,792 @@
 /* mpiutil.ac  -  Utility functions for MPI
  * Copyright (C) 1998, 2000, 2001, 2002, 2003,
  *               2007  Free Software Foundation, Inc.
  * Copyright (C) 2013  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "g10lib.h"
 #include "mpi-internal.h"
 #include "mod-source-info.h"
 #include "const-time.h"
 
 
 #if SIZEOF_UNSIGNED_INT == 2
 # define MY_UINT_MAX 0xffff
 /* (visual check:      0123 ) */
 #elif SIZEOF_UNSIGNED_INT == 4
 # define MY_UINT_MAX 0xffffffff
 /* (visual check:      01234567 ) */
 #elif SIZEOF_UNSIGNED_INT == 8
 # define MY_UINT_MAX 0xffffffffffffffff
 /* (visual check:      0123456789abcdef ) */
 #else
 # error Need MY_UINT_MAX for this limb size
 #endif
 
 
 /* Constants allocated right away at startup.  */
 static gcry_mpi_t constants[MPI_NUMBER_OF_CONSTANTS];
 
 
 const char *
 _gcry_mpi_get_hw_config (void)
 {
   return mod_source_info + 1;
 }
 
 
 /* Initialize the MPI subsystem.  This is called early and allows to
    do some initialization without taking care of threading issues.  */
 gcry_err_code_t
 _gcry_mpi_init (void)
 {
   int idx;
   unsigned long value;
 
   for (idx=0; idx < MPI_NUMBER_OF_CONSTANTS; idx++)
     {
       switch (idx)
         {
         case MPI_C_ZERO:  value = 0; break;
         case MPI_C_ONE:   value = 1; break;
         case MPI_C_TWO:   value = 2; break;
         case MPI_C_THREE: value = 3; break;
         case MPI_C_FOUR:  value = 4; break;
         case MPI_C_EIGHT: value = 8; break;
         default: log_bug ("invalid mpi_const selector %d\n", idx);
         }
       constants[idx] = mpi_alloc_set_ui (value);
       constants[idx]->flags = (16|32);
     }
 
   return 0;
 }
 
 
 /****************
  * Note:  It was a bad idea to use the number of limbs to allocate
  *	  because on a alpha the limbs are large but we normally need
  *	  integers of n bits - So we should change this to bits (or bytes).
  *
  *	  But mpi_alloc is used in a lot of places :-(.  New code
  *	  should use mpi_new.
  */
 gcry_mpi_t
 _gcry_mpi_alloc( unsigned nlimbs )
 {
     gcry_mpi_t a;
 
     a = xmalloc( sizeof *a );
     a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 0 ) : NULL;
     a->alloced = nlimbs;
     a->nlimbs = 0;
     a->sign = 0;
     a->flags = 0;
     return a;
 }
 
 gcry_mpi_t
 _gcry_mpi_alloc_secure( unsigned nlimbs )
 {
     gcry_mpi_t a;
 
     a = xmalloc( sizeof *a );
     a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 1 ) : NULL;
     a->alloced = nlimbs;
     a->flags = 1;
     a->nlimbs = 0;
     a->sign = 0;
     return a;
 }
 
 
 
 mpi_ptr_t
 _gcry_mpi_alloc_limb_space( unsigned int nlimbs, int secure )
 {
     mpi_ptr_t p;
     size_t len;
 
     len = (nlimbs ? nlimbs : 1) * sizeof (mpi_limb_t);
     p = secure ? xmalloc_secure (len) : xmalloc (len);
     if (! nlimbs)
       *p = 0;
 
     return p;
 }
 
 void
 _gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs)
 {
   if (a)
     {
       size_t len = nlimbs * sizeof(mpi_limb_t);
 
       /* If we have information on the number of allocated limbs, we
          better wipe that space out.  This is a failsafe feature if
          secure memory has been disabled or was not properly
          implemented in user provided allocation functions. */
       if (len)
         wipememory (a, len);
       xfree(a);
     }
 }
 
 
 void
 _gcry_mpi_assign_limb_space( gcry_mpi_t a, mpi_ptr_t ap, unsigned int nlimbs )
 {
   _gcry_mpi_free_limb_space (a->d, a->alloced);
   a->d = ap;
   a->alloced = nlimbs;
 }
 
 
 
 /****************
  * Resize the array of A to NLIMBS. The additional space is cleared
  * (set to 0).
  */
 void
 _gcry_mpi_resize (gcry_mpi_t a, unsigned nlimbs)
 {
   size_t i;
 
   if (nlimbs <= a->alloced)
     {
       /* We only need to clear the new space (this is a nop if the
          limb space is already of the correct size. */
       for (i=a->nlimbs; i < a->alloced; i++)
         a->d[i] = 0;
       return;
     }
 
   /* Actually resize the limb space.  */
   if (a->d)
     {
       a->d = xrealloc (a->d, nlimbs * sizeof (mpi_limb_t));
       for (i=a->nlimbs; i < nlimbs; i++)
         a->d[i] = 0;
     }
   else
     {
       if (a->flags & 1)
 	/* Secure memory is wanted.  */
 	a->d = xcalloc_secure (nlimbs , sizeof (mpi_limb_t));
       else
 	/* Standard memory.  */
 	a->d = xcalloc (nlimbs , sizeof (mpi_limb_t));
     }
   a->alloced = nlimbs;
 }
 
 void
 _gcry_mpi_clear( gcry_mpi_t a )
 {
   if (mpi_is_immutable (a))
     {
       mpi_immutable_failed ();
       return;
     }
   a->nlimbs = 0;
   a->flags = 0;
 }
 
 
 void
 _gcry_mpi_free( gcry_mpi_t a )
 {
   if (!a )
     return;
   if ((a->flags & 32))
   {
 #if GPGRT_VERSION_NUMBER >= 0x011600  /* 1.22 */
     gpgrt_annotate_leaked_object(a);
 #endif
     return; /* Never release a constant. */
   }
   if ((a->flags & 4))
     xfree( a->d );
   else
     {
       _gcry_mpi_free_limb_space(a->d, a->alloced);
     }
   /* Check that the flags makes sense.  We better allow for bit 1
      (value 2) for backward ABI compatibility.  */
   if ((a->flags & ~(1|2|4|16
                     |GCRYMPI_FLAG_USER1
                     |GCRYMPI_FLAG_USER2
                     |GCRYMPI_FLAG_USER3
                     |GCRYMPI_FLAG_USER4)))
     log_bug("invalid flag value in mpi_free\n");
   xfree (a);
 }
 
 
 void
 _gcry_mpi_immutable_failed (void)
 {
   log_info ("Warning: trying to change an immutable MPI\n");
 }
 
 
 static void
 mpi_set_secure( gcry_mpi_t a )
 {
   mpi_ptr_t ap, bp;
 
   if ( (a->flags & 1) )
     return;
   a->flags |= 1;
   ap = a->d;
   if (!a->nlimbs)
     {
       gcry_assert (!ap);
       return;
     }
   bp = mpi_alloc_limb_space (a->alloced, 1);
   MPN_COPY( bp, ap, a->nlimbs );
   a->d = bp;
   _gcry_mpi_free_limb_space (ap, a->alloced);
 }
 
 
 gcry_mpi_t
 _gcry_mpi_set_opaque (gcry_mpi_t a, void *p, unsigned int nbits)
 {
   if (!a)
     a = mpi_alloc(0);
 
   if (mpi_is_immutable (a))
     {
       mpi_immutable_failed ();
       return a;
     }
 
   if( a->flags & 4 )
     xfree (a->d);
   else
     _gcry_mpi_free_limb_space (a->d, a->alloced);
 
   a->d = p;
   a->alloced = 0;
   a->nlimbs = 0;
   a->sign  = nbits;
   a->flags = 4 | (a->flags & (GCRYMPI_FLAG_USER1|GCRYMPI_FLAG_USER2
                               |GCRYMPI_FLAG_USER3|GCRYMPI_FLAG_USER4));
   if (_gcry_is_secure (a->d))
     a->flags |= 1;
   return a;
 }
 
 
 gcry_mpi_t
 _gcry_mpi_set_opaque_copy (gcry_mpi_t a, const void *p, unsigned int nbits)
 {
   void *d;
   unsigned int n;
 
   n = (nbits+7)/8;
   d = _gcry_is_secure (p)? xtrymalloc_secure (n) : xtrymalloc (n);
   if (!d)
     return NULL;
   memcpy (d, p, n);
   return mpi_set_opaque (a, d, nbits);
 }
 
 
 void *
 _gcry_mpi_get_opaque (gcry_mpi_t a, unsigned int *nbits)
 {
     if( !(a->flags & 4) )
 	log_bug("mpi_get_opaque on normal mpi\n");
     if( nbits )
 	*nbits = a->sign;
     return a->d;
 }
 
 
 void *
 _gcry_mpi_get_opaque_copy (gcry_mpi_t a, unsigned int *nbits)
 {
   const void *s;
   void *d;
   unsigned int n;
 
   s = mpi_get_opaque (a, nbits);
   if (!s && nbits)
     return NULL;
   n = (*nbits+7)/8;
   d = _gcry_is_secure (s)? xtrymalloc_secure (n) : xtrymalloc (n);
   if (d)
     memcpy (d, s, n);
   return d;
 }
 
 /****************
  * Note: This copy function should not interpret the MPI
  *	 but copy it transparently.
  */
 gcry_mpi_t
 _gcry_mpi_copy (gcry_mpi_t a)
 {
     int i;
     gcry_mpi_t b;
 
     if( a && (a->flags & 4) ) {
         void *p = NULL;
         if (a->sign) {
             p = _gcry_is_secure(a->d)? xmalloc_secure ((a->sign+7)/8)
                                      : xmalloc ((a->sign+7)/8);
             if (a->d)
                 memcpy( p, a->d, (a->sign+7)/8 );
         }
         b = mpi_set_opaque( NULL, p, a->sign );
         b->flags = a->flags;
         b->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
     }
     else if( a ) {
 	b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs )
 			    : mpi_alloc( a->nlimbs );
 	b->nlimbs = a->nlimbs;
 	b->sign = a->sign;
 	b->flags  = a->flags;
         b->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
 	for(i=0; i < b->nlimbs; i++ )
 	    b->d[i] = a->d[i];
     }
     else
 	b = NULL;
     return b;
 }
 
 
 /* Return true if A is negative.  */
 int
 _gcry_mpi_is_neg (gcry_mpi_t a)
 {
   if (a->sign && _gcry_mpi_cmp_ui (a, 0))
     return 1;
   else
     return 0;
 }
 
 
 /* W = - U */
 void
 _gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u)
 {
   if (w != u)
     mpi_set (w, u);
   else if (mpi_is_immutable (w))
     {
       mpi_immutable_failed ();
       return;
     }
 
   w->sign = !u->sign;
 }
 
 
 /* W = [W] */
 void
 _gcry_mpi_abs (gcry_mpi_t w)
 {
   if (mpi_is_immutable (w))
     {
       mpi_immutable_failed ();
       return;
     }
 
   w->sign = 0;
 }
 
 
 /****************
  * This function allocates an MPI which is optimized to hold
  * a value as large as the one given in the argument and allocates it
  * with the same flags as A.
  */
 gcry_mpi_t
 _gcry_mpi_alloc_like( gcry_mpi_t a )
 {
     gcry_mpi_t b;
 
     if( a && (a->flags & 4) ) {
 	int n = (a->sign+7)/8;
 	void *p = _gcry_is_secure(a->d)? xtrymalloc_secure (n)
                                        : xtrymalloc (n);
 	memcpy( p, a->d, n );
 	b = mpi_set_opaque( NULL, p, a->sign );
     }
     else if( a ) {
 	b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs )
 			    : mpi_alloc( a->nlimbs );
 	b->nlimbs = 0;
 	b->sign = 0;
 	b->flags = a->flags;
     }
     else
 	b = NULL;
     return b;
 }
 
 
 /* Set U into W and release U.  If W is NULL only U will be released. */
 void
 _gcry_mpi_snatch (gcry_mpi_t w, gcry_mpi_t u)
 {
   if (w)
     {
       if (mpi_is_immutable (w))
         {
           mpi_immutable_failed ();
           return;
         }
       _gcry_mpi_assign_limb_space (w, u->d, u->alloced);
       w->nlimbs = u->nlimbs;
       w->sign   = u->sign;
       w->flags  = u->flags;
       u->alloced = 0;
       u->nlimbs = 0;
       u->d = NULL;
     }
   _gcry_mpi_free (u);
 }
 
 
 gcry_mpi_t
 _gcry_mpi_set (gcry_mpi_t w, gcry_mpi_t u)
 {
   mpi_ptr_t wp, up;
   mpi_size_t usize = u->nlimbs;
   int usign = u->sign;
 
   if (!w)
     w = _gcry_mpi_alloc( mpi_get_nlimbs(u) );
   if (mpi_is_immutable (w))
     {
       mpi_immutable_failed ();
       return w;
     }
   RESIZE_IF_NEEDED(w, usize);
   wp = w->d;
   up = u->d;
   MPN_COPY( wp, up, usize );
   w->nlimbs = usize;
   w->flags = u->flags;
   w->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
   w->sign = usign;
   return w;
 }
 
 /****************
  * Set the value of W by the one of U, when SET is 1.
  * Leave the value when SET is 0.
  * This implementation should be constant-time regardless of SET.
  */
 gcry_mpi_t
 _gcry_mpi_set_cond (gcry_mpi_t w, const gcry_mpi_t u, unsigned long set)
 {
   /* Note: dual mask with AND/OR used for EM leakage mitigation */
-  mpi_limb_t mask1 = _gcry_ct_vzero - set;
-  mpi_limb_t mask2 = set - _gcry_ct_vone;
+  mpi_limb_t mask1 = ct_limb_gen_mask(set);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(set);
   mpi_size_t i;
   mpi_size_t nlimbs = u->alloced;
   mpi_limb_t xu;
   mpi_limb_t xw;
   mpi_limb_t *uu = u->d;
   mpi_limb_t *uw = w->d;
 
   if (w->alloced != u->alloced)
     log_bug ("mpi_set_cond: different sizes\n");
 
   for (i = 0; i < nlimbs; i++)
     {
       xu = uu[i];
       xw = uw[i];
       uw[i] = (xw & mask2) | (xu & mask1);
     }
 
   xu = u->nlimbs;
   xw = w->nlimbs;
   w->nlimbs = (xw & mask2) | (xu & mask1);
 
   xu = u->sign;
   xw = w->sign;
   w->sign = (xw & mask2) | (xu & mask1);
   return w;
 }
 
 
 gcry_mpi_t
 _gcry_mpi_set_ui (gcry_mpi_t w, unsigned long u)
 {
   if (!w)
     w = _gcry_mpi_alloc (1);
   /* FIXME: If U is 0 we have no need to resize and thus possible
      allocating the the limbs. */
   if (mpi_is_immutable (w))
     {
       mpi_immutable_failed ();
       return w;
     }
   RESIZE_IF_NEEDED(w, 1);
   w->d[0] = u;
   w->nlimbs = u? 1:0;
   w->sign = 0;
   w->flags = 0;
   return w;
 }
 
 /* If U is non-negative and small enough store it as an unsigned int
  * at W.  If the value does not fit into an unsigned int or is
  * negative return GPG_ERR_ERANGE.  Note that we return an unsigned
  * int so that the value can be used with the bit test functions; in
  * contrast the other _ui functions take an unsigned long so that on
  * some platforms they may accept a larger value.  On error the value
  * at W is not changed. */
 gcry_err_code_t
 _gcry_mpi_get_ui (unsigned int *w, gcry_mpi_t u)
 {
   mpi_limb_t x;
 
   if (u->nlimbs > 1 || u->sign)
     return GPG_ERR_ERANGE;
 
   x = (u->nlimbs == 1) ? u->d[0] : 0;
   if (sizeof (x) > sizeof (unsigned int) && x > MY_UINT_MAX)
     return GPG_ERR_ERANGE;
 
   *w = x;
   return 0;
 }
 
 
 gcry_mpi_t
 _gcry_mpi_alloc_set_ui( unsigned long u)
 {
     gcry_mpi_t w = mpi_alloc(1);
     w->d[0] = u;
     w->nlimbs = u? 1:0;
     w->sign = 0;
     return w;
 }
 
 void
 _gcry_mpi_swap (gcry_mpi_t a, gcry_mpi_t b)
 {
     struct gcry_mpi tmp;
 
     tmp = *a; *a = *b; *b = tmp;
 }
 
 
 /****************
  * Swap the value of A and B, when SWAP is 1.
  * Leave the value when SWAP is 0.
  * This implementation should be constant-time regardless of SWAP.
  */
 void
 _gcry_mpi_swap_cond (gcry_mpi_t a, gcry_mpi_t b, unsigned long swap)
 {
   /* Note: dual mask with AND/OR used for EM leakage mitigation */
-  mpi_limb_t mask1 = _gcry_ct_vzero - swap;
-  mpi_limb_t mask2 = swap - _gcry_ct_vone;
+  mpi_limb_t mask1 = ct_limb_gen_mask(swap);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(swap);
   mpi_size_t i;
   mpi_size_t nlimbs;
   mpi_limb_t *ua = a->d;
   mpi_limb_t *ub = b->d;
   mpi_limb_t xa;
   mpi_limb_t xb;
 
   if (a->alloced > b->alloced)
     nlimbs = b->alloced;
   else
     nlimbs = a->alloced;
   if (a->nlimbs > nlimbs || b->nlimbs > nlimbs)
     log_bug ("mpi_swap_cond: different sizes\n");
 
   for (i = 0; i < nlimbs; i++)
     {
       xa = ua[i];
       xb = ub[i];
       ua[i] = (xa & mask2) | (xb & mask1);
       ub[i] = (xa & mask1) | (xb & mask2);
     }
 
   xa = a->nlimbs;
   xb = b->nlimbs;
   a->nlimbs = (xa & mask2) | (xb & mask1);
   b->nlimbs = (xa & mask1) | (xb & mask2);
 
   xa = a->sign;
   xb = b->sign;
   a->sign = (xa & mask2) | (xb & mask1);
   b->sign = (xa & mask1) | (xb & mask2);
 }
 
 
 /****************
  * Set bit N of A, when SET is 1.
  * This implementation should be constant-time regardless of SET.
  */
 void
 _gcry_mpi_set_bit_cond (gcry_mpi_t a, unsigned int n, unsigned long set)
 {
   unsigned int limbno, bitno;
   mpi_limb_t set_the_bit = !!set;
 
   limbno = n / BITS_PER_MPI_LIMB;
   bitno  = n % BITS_PER_MPI_LIMB;
 
   a->d[limbno] |= (set_the_bit<<bitno);
 }
 
 
 gcry_mpi_t
 _gcry_mpi_new (unsigned int nbits)
 {
     return _gcry_mpi_alloc ( (nbits+BITS_PER_MPI_LIMB-1)
                              / BITS_PER_MPI_LIMB );
 }
 
 
 gcry_mpi_t
 _gcry_mpi_snew (unsigned int nbits)
 {
   return _gcry_mpi_alloc_secure ( (nbits+BITS_PER_MPI_LIMB-1)
                                   / BITS_PER_MPI_LIMB );
 }
 
 void
 _gcry_mpi_release( gcry_mpi_t a )
 {
     _gcry_mpi_free( a );
 }
 
 void
 _gcry_mpi_randomize (gcry_mpi_t w,
                      unsigned int nbits, enum gcry_random_level level)
 {
   unsigned char *p;
   size_t nbytes = (nbits+7)/8;
 
   if (mpi_is_immutable (w))
     {
       mpi_immutable_failed ();
       return;
     }
   if (level == GCRY_WEAK_RANDOM)
     {
       p = mpi_is_secure(w) ? xmalloc_secure (nbytes)
                            : xmalloc (nbytes);
       _gcry_create_nonce (p, nbytes);
     }
   else
     {
       p = mpi_is_secure(w) ? _gcry_random_bytes_secure (nbytes, level)
                            : _gcry_random_bytes (nbytes, level);
     }
   _gcry_mpi_set_buffer( w, p, nbytes, 0 );
   xfree (p);
 }
 
 
 void
 _gcry_mpi_set_flag (gcry_mpi_t a, enum gcry_mpi_flag flag)
 {
   switch (flag)
     {
     case GCRYMPI_FLAG_SECURE:     mpi_set_secure(a); break;
     case GCRYMPI_FLAG_CONST:      a->flags |= (16|32); break;
     case GCRYMPI_FLAG_IMMUTABLE:  a->flags |= 16; break;
 
     case GCRYMPI_FLAG_USER1:
     case GCRYMPI_FLAG_USER2:
     case GCRYMPI_FLAG_USER3:
     case GCRYMPI_FLAG_USER4:      a->flags |= flag; break;
 
     case GCRYMPI_FLAG_OPAQUE:
     default: log_bug("invalid flag value\n");
     }
 }
 
 void
 _gcry_mpi_clear_flag (gcry_mpi_t a, enum gcry_mpi_flag flag)
 {
   (void)a; /* Not yet used. */
 
   switch (flag)
     {
     case GCRYMPI_FLAG_IMMUTABLE:
       if (!(a->flags & 32))
         a->flags &= ~16;
       break;
 
     case GCRYMPI_FLAG_USER1:
     case GCRYMPI_FLAG_USER2:
     case GCRYMPI_FLAG_USER3:
     case GCRYMPI_FLAG_USER4:
       a->flags &= ~flag;
       break;
 
     case GCRYMPI_FLAG_CONST:
     case GCRYMPI_FLAG_SECURE:
     case GCRYMPI_FLAG_OPAQUE:
     default: log_bug("invalid flag value\n");
     }
 }
 
 int
 _gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag flag)
 {
   switch (flag)
     {
     case GCRYMPI_FLAG_SECURE:    return !!(a->flags & 1);
     case GCRYMPI_FLAG_OPAQUE:    return !!(a->flags & 4);
     case GCRYMPI_FLAG_IMMUTABLE: return !!(a->flags & 16);
     case GCRYMPI_FLAG_CONST:     return !!(a->flags & 32);
     case GCRYMPI_FLAG_USER1:
     case GCRYMPI_FLAG_USER2:
     case GCRYMPI_FLAG_USER3:
     case GCRYMPI_FLAG_USER4:     return !!(a->flags & flag);
     default: log_bug("invalid flag value\n");
     }
   /*NOTREACHED*/
   return 0;
 }
 
 
 /* Return a constant MPI descripbed by NO which is one of the
    MPI_C_xxx macros.  There is no need to copy this returned value; it
    may be used directly.  */
 gcry_mpi_t
 _gcry_mpi_const (enum gcry_mpi_constants no)
 {
   if ((int)no < 0 || no > MPI_NUMBER_OF_CONSTANTS)
     log_bug("invalid mpi_const selector %d\n", no);
   if (!constants[no])
     log_bug("MPI subsystem not initialized\n");
   return constants[no];
 }
diff --git a/src/const-time.c b/src/const-time.c
index 73bf8b40..0fb53a07 100644
--- a/src/const-time.c
+++ b/src/const-time.c
@@ -1,86 +1,88 @@
 /* const-time.c  -  Constant-time functions
  *      Copyright (C) 2023  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "g10lib.h"
 #include "const-time.h"
 
 
+#ifndef HAVE_GCC_ASM_VOLATILE_MEMORY
 /* These variables are used to generate masks from conditional operation
  * flag parameters.  Use of volatile prevents compiler optimizations from
  * converting AND-masking to conditional branches.  */
 volatile unsigned int _gcry_ct_vzero = 0;
 volatile unsigned int _gcry_ct_vone = 1;
+#endif
 
 
 /*
  * Compare byte arrays of length LEN, return 1 if it's not same,
  * 0, otherwise.
  */
 unsigned int
 _gcry_ct_not_memequal (const void *b1, const void *b2, size_t len)
 {
   const byte *a = b1;
   const byte *b = b2;
   int ab, ba;
   size_t i;
 
   /* Constant-time compare. */
   for (i = 0, ab = 0, ba = 0; i < len; i++)
     {
       /* If a[i] != b[i], either ab or ba will be negative. */
       ab |= a[i] - b[i];
       ba |= b[i] - a[i];
     }
 
   /* 'ab | ba' is negative when buffers are not equal, extract sign bit.  */
   return ((unsigned int)(ab | ba) >> (sizeof(unsigned int) * 8 - 1)) & 1;
 }
 
 /*
  * Compare byte arrays of length LEN, return 0 if it's not same,
  * 1, otherwise.
  */
 unsigned int
 _gcry_ct_memequal (const void *b1, const void *b2, size_t len)
 {
   return _gcry_ct_not_memequal (b1, b2, len) ^ 1;
 }
 
 /*
  * Copy LEN bytes from memory area SRC to memory area DST, when
  * OP_ENABLED=1.  When DST <= SRC, the memory areas may overlap.  When
  * DST > SRC, the memory areas must not overlap.
  */
 void
 _gcry_ct_memmov_cond (void *dst, const void *src, size_t len,
 		      unsigned long op_enable)
 {
   /* Note: dual mask with AND/OR used for EM leakage mitigation */
-  unsigned char mask1 = _gcry_ct_vzero - op_enable;
-  unsigned char mask2 = op_enable - _gcry_ct_vone;
+  unsigned char mask1 = ct_ulong_gen_mask(op_enable);
+  unsigned char mask2 = ct_ulong_gen_inv_mask(op_enable);
   unsigned char *b_dst = dst;
   const unsigned char *b_src = src;
   size_t i;
 
   for (i = 0; i < len; i++)
     b_dst[i] = (b_dst[i] & mask2) | (b_src[i] & mask1);
 }
diff --git a/src/const-time.h b/src/const-time.h
index e324dcb7..fe07cc7a 100644
--- a/src/const-time.h
+++ b/src/const-time.h
@@ -1,117 +1,167 @@
 /* const-time.h  -  Constant-time functions
  *      Copyright (C) 2023  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * Libgcrypt is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 
 #ifndef GCRY_CONST_TIME_H
 #define GCRY_CONST_TIME_H
 
 #include "types.h"
 
 
 #define ct_not_memequal _gcry_ct_not_memequal
 #define ct_memequal _gcry_ct_memequal
 #define ct_memmov_cond _gcry_ct_memmov_cond
 
 
+#ifndef HAVE_GCC_ASM_VOLATILE_MEMORY
 extern volatile unsigned int _gcry_ct_vzero;
 extern volatile unsigned int _gcry_ct_vone;
+#endif
 
 
 /*
  * Return 0 if A is 0 and return 1 otherwise.
  */
 static inline unsigned int
 ct_is_not_zero (unsigned int a)
 {
   /* Sign bit set if A != 0. */
   a = a | (-a);
 
   return a >> (sizeof(unsigned int) * 8 - 1);
 }
 
 /*
  * Return 1 if A is 0 and return 0 otherwise.
  */
 static inline unsigned int
 ct_is_zero (unsigned int a)
 {
   /* Sign bit set if A == 0. */
   a = ~a & ~(-a);
 
   return a >> (sizeof(unsigned int) * 8 - 1);
 }
 
 /*
  * Return 1 if it's not same, 0 if same.
  */
 static inline unsigned int
 ct_not_equal_byte (unsigned char b0, unsigned char b1)
 {
   unsigned int diff;
 
   diff = b0;
   diff ^= b1;
 
   return (0U - diff) >> (sizeof (unsigned int)*8 - 1);
 }
 
 /* Compare byte-arrays of length LEN, return 1 if it's not same, 0
    otherwise.  We use pointer of void *, so that it can be used with
    any structure.  */
 unsigned int _gcry_ct_not_memequal (const void *b1, const void *b2, size_t len);
 
 /* Compare byte-arrays of length LEN, return 0 if it's not same, 1
    otherwise.  We use pointer of void *, so that it can be used with
    any structure.  */
 unsigned int _gcry_ct_memequal (const void *b1, const void *b2, size_t len);
 
+/*
+ * Return all bits set if A is 1 and return 0 otherwise.
+ */
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+#  define DEFINE_CT_TYPE_GEN_MASK(name, type) \
+     static inline type \
+     ct_##name##_gen_mask (unsigned long op_enable) \
+     { \
+       type mask = -(type)op_enable; \
+       asm volatile ("\n" : "+r" (mask) :: "memory"); \
+       return mask; \
+     }
+#else
+#  define DEFINE_CT_TYPE_GEN_MASK(name, type) \
+     static inline type \
+     ct_##name##_gen_mask (unsigned long op_enable) \
+     { \
+       type mask = (type)_gcry_ct_vzero - (type)op_enable; \
+       return mask; \
+     }
+#endif
+DEFINE_CT_TYPE_GEN_MASK(uintptr, uintptr_t)
+DEFINE_CT_TYPE_GEN_MASK(ulong, unsigned long)
+
+/*
+ * Return all bits set if A is 0 and return 1 otherwise.
+ */
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+#  define DEFINE_CT_TYPE_GEN_INV_MASK(name, type) \
+     static inline type \
+     ct_##name##_gen_inv_mask (unsigned long op_enable) \
+     { \
+       type mask = (type)op_enable - (type)1; \
+       asm volatile ("\n" : "+r" (mask) :: "memory"); \
+       return mask; \
+     }
+#else
+#  define DEFINE_CT_TYPE_GEN_INV_MASK(name, type) \
+     static inline type \
+     ct_##name##_gen_inv_mask (unsigned long op_enable) \
+     { \
+       type mask = (type)op_enable - (type)_gcry_ct_vone; \
+       return mask; \
+     }
+#endif
+DEFINE_CT_TYPE_GEN_INV_MASK(uintptr, uintptr_t)
+DEFINE_CT_TYPE_GEN_INV_MASK(ulong, unsigned long)
+
 /*
  *  Return A when OP_ENABLED=1
  *  otherwise, return B
  */
 #define DEFINE_CT_TYPE_SELECT_FUNC(name, type) \
   static inline type \
   ct_##name##_select (type a, type b, unsigned long op_enable) \
   { \
-    type mask_b = (type)op_enable - (type)_gcry_ct_vone; \
-    type mask_a = (type)_gcry_ct_vzero - (type)op_enable; \
+    type mask_b = ct_##name##_gen_inv_mask(op_enable); \
+    type mask_a = ct_##name##_gen_mask(op_enable); \
     return (mask_a & a) | (mask_b & b); \
   }
 DEFINE_CT_TYPE_SELECT_FUNC(uintptr, uintptr_t)
 DEFINE_CT_TYPE_SELECT_FUNC(ulong, unsigned long)
 
 /*
  *  Return NULL when OP_ENABLED=1
  *  otherwise, return W
  */
 static inline gcry_sexp_t
 sexp_null_cond (gcry_sexp_t w, unsigned long op_enable)
 {
   uintptr_t o = ct_uintptr_select((uintptr_t)NULL, (uintptr_t)w, op_enable);
   return (gcry_sexp_t)(void *)o;
 }
 
 /*
  * Copy LEN bytes from memory area SRC to memory area DST, when
  * OP_ENABLED=1.  When DST <= SRC, the memory areas may overlap.  When
  * DST > SRC, the memory areas must not overlap.
  */
 void _gcry_ct_memmov_cond (void *dst, const void *src, size_t len,
 			   unsigned long op_enable);
 
 #endif /*GCRY_CONST_TIME_H*/