diff --git a/mpi/ec-nist.c b/mpi/ec-nist.c index 6dfaa1da..3213f9df 100644 --- a/mpi/ec-nist.c +++ b/mpi/ec-nist.c @@ -1,817 +1,817 @@ /* ec-nist.c - NIST optimized elliptic curve functions * Copyright (C) 2021 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #ifndef ASM_DISABLED #include "mpi-internal.h" #include "longlong.h" #include "g10lib.h" #include "context.h" #include "ec-context.h" #include "ec-inline.h" #include "const-time.h" static inline void prefetch(const void *tab, size_t len) { const volatile byte *vtab = tab; if (len > 0 * 64) (void)vtab[0 * 64]; if (len > 1 * 64) (void)vtab[1 * 64]; if (len > 2 * 64) (void)vtab[2 * 64]; if (len > 3 * 64) (void)vtab[3 * 64]; if (len > 4 * 64) (void)vtab[4 * 64]; if (len > 5 * 64) (void)vtab[5 * 64]; if (len > 6 * 64) (void)vtab[6 * 64]; if (len > 7 * 64) (void)vtab[7 * 64]; if (len > 8 * 64) (void)vtab[8 * 64]; if (len > 9 * 64) (void)vtab[9 * 64]; if (len > 10 * 64) (void)vtab[10 * 64]; (void)vtab[len - 1]; } /* Fast reduction routines for NIST curves. */ void _gcry_mpi_ec_nist192_mod (gcry_mpi_t w, mpi_ec_t ctx) { static const mpi_limb64_t p_mult[3][4] = { { /* P * 1 */ LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U) }, { /* P * 2 */ LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000001U) }, { /* P * 3 */ LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0xffffffffU, 0xfffffffcU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000002U) } }; const mpi_limb64_t zero = LIMB_TO64(0); mpi_ptr_t wp; mpi_limb64_t s[192 / BITS_PER_MPI_LIMB64 + 1]; mpi_limb64_t o[DIM(s)]; const mpi_size_t wsize = DIM(s) - 1; mpi_limb_t mask1; mpi_limb_t mask2; mpi_limb_t s_is_negative; int carry; MPN_NORMALIZE (w->d, w->nlimbs); if (mpi_nbits_more_than (w, 2 * 192)) log_bug ("W must be less than m^2\n"); RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64); RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64); wp = w->d; prefetch (p_mult, sizeof(p_mult)); /* See "FIPS 186-4, D.2.1 Curve P-192". */ s[0] = LOAD64(wp, 3); ADD3_LIMB64 (s[3], s[2], s[1], zero, zero, LOAD64(wp, 3), zero, LOAD64(wp, 4), LOAD64(wp, 4)); ADD4_LIMB64 (s[3], s[2], s[1], s[0], s[3], s[2], s[1], s[0], zero, LOAD64(wp, 5), LOAD64(wp, 5), LOAD64(wp, 5)); ADD4_LIMB64 (s[3], s[2], s[1], s[0], s[3], s[2], s[1], s[0], zero, LOAD64(wp, 2), LOAD64(wp, 1), LOAD64(wp, 0)); /* mod p: * 's[3]' holds carry value (0..2). Subtract (carry + 1) * p. Result will be * with in range -p...p. Handle result being negative with addition and * conditional store. */ carry = LO32_LIMB64(s[3]); SUB4_LIMB64 (s[3], s[2], s[1], s[0], s[3], s[2], s[1], s[0], p_mult[carry][3], p_mult[carry][2], p_mult[carry][1], p_mult[carry][0]); ADD4_LIMB64 (o[3], o[2], o[1], o[0], s[3], s[2], s[1], s[0], zero, p_mult[0][2], p_mult[0][1], p_mult[0][0]); s_is_negative = LO32_LIMB64(s[3]) >> 31; - mask2 = _gcry_ct_vzero - s_is_negative; - mask1 = s_is_negative - _gcry_ct_vone; + mask2 = ct_limb_gen_mask(s_is_negative); + mask1 = ct_limb_gen_inv_mask(s_is_negative); STORE64_COND(wp, 0, mask2, o[0], mask1, s[0]); STORE64_COND(wp, 1, mask2, o[1], mask1, s[1]); STORE64_COND(wp, 2, mask2, o[2], mask1, s[2]); w->nlimbs = 192 / BITS_PER_MPI_LIMB; MPN_NORMALIZE (wp, w->nlimbs); } void _gcry_mpi_ec_nist224_mod (gcry_mpi_t w, mpi_ec_t ctx) { static const mpi_limb64_t p_mult[5][4] = { { /* P * -1 */ LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xffffffffU, 0x00000000U) }, { /* P * 0 */ LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U) }, { /* P * 1 */ LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0xffffffffU, 0x00000000U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU) }, { /* P * 2 */ LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0xfffffffeU, 0x00000000U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000001U, 0xffffffffU) }, { /* P * 3 */ LIMB64_C(0x00000000U, 0x00000003U), LIMB64_C(0xfffffffdU, 0x00000000U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000002U, 0xffffffffU) } }; const mpi_limb64_t zero = LIMB_TO64(0); mpi_ptr_t wp; mpi_limb64_t s[(224 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64]; mpi_limb64_t d[DIM(s)]; const mpi_size_t wsize = DIM(s); mpi_size_t psize = ctx->p->nlimbs; mpi_limb_t mask1; mpi_limb_t mask2; mpi_limb_t s_is_negative; int carry; MPN_NORMALIZE (w->d, w->nlimbs); if (mpi_nbits_more_than (w, 2 * 224)) log_bug ("W must be less than m^2\n"); RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64); RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64); ctx->p->nlimbs = psize; wp = w->d; prefetch (p_mult, sizeof(p_mult)); /* See "FIPS 186-4, D.2.2 Curve P-224". */ /* "S1 + S2" with 64-bit limbs: * [0:A10]:[ A9: A8]:[ A7:0]:[0:0] * + [0:0]:[A13:A12]:[A11:0]:[0:0] * => s[3]:s[2]:s[1]:s[0] */ s[0] = zero; ADD3_LIMB64 (s[3], s[2], s[1], LIMB64_HILO(0, LOAD32(wp, 10)), LOAD64(wp, 8 / 2), LIMB64_HILO(LOAD32(wp, 7), 0), zero, LOAD64(wp, 12 / 2), LIMB64_HILO(LOAD32(wp, 11), 0)); /* "T + S1 + S2" */ ADD4_LIMB64 (s[3], s[2], s[1], s[0], s[3], s[2], s[1], s[0], LIMB64_HILO(0, LOAD32(wp, 6)), LOAD64(wp, 4 / 2), LOAD64(wp, 2 / 2), LOAD64(wp, 0 / 2)); /* "D1 + D2" with 64-bit limbs: * [0:A13]:[A12:A11]:[A10: A9]:[ A8: A7] * + [0:0]:[ 0: 0]:[ 0:A13]:[A12:A11] * => d[3]:d[2]:d[1]:d[0] */ ADD4_LIMB64 (d[3], d[2], d[1], d[0], LIMB64_HILO(0, LOAD32(wp, 13)), LOAD64_UNALIGNED(wp, 11 / 2), LOAD64_UNALIGNED(wp, 9 / 2), LOAD64_UNALIGNED(wp, 7 / 2), zero, zero, LIMB64_HILO(0, LOAD32(wp, 13)), LOAD64_UNALIGNED(wp, 11 / 2)); /* "T + S1 + S2 - D1 - D2" */ SUB4_LIMB64 (s[3], s[2], s[1], s[0], s[3], s[2], s[1], s[0], d[3], d[2], d[1], d[0]); /* mod p: * Upper 32-bits of 's[3]' holds carry value (-2..2). * Subtract (carry + 1) * p. Result will be with in range -p...p. * Handle result being negative with addition and conditional store. */ carry = HI32_LIMB64(s[3]); SUB4_LIMB64 (s[3], s[2], s[1], s[0], s[3], s[2], s[1], s[0], p_mult[carry + 2][3], p_mult[carry + 2][2], p_mult[carry + 2][1], p_mult[carry + 2][0]); ADD4_LIMB64 (d[3], d[2], d[1], d[0], s[3], s[2], s[1], s[0], p_mult[0 + 2][3], p_mult[0 + 2][2], p_mult[0 + 2][1], p_mult[0 + 2][0]); s_is_negative = (HI32_LIMB64(s[3]) >> 31); - mask2 = _gcry_ct_vzero - s_is_negative; - mask1 = s_is_negative - _gcry_ct_vone; + mask2 = ct_limb_gen_mask(s_is_negative); + mask1 = ct_limb_gen_inv_mask(s_is_negative); STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]); STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]); STORE64_COND(wp, 2, mask2, d[2], mask1, s[2]); STORE64_COND(wp, 3, mask2, d[3], mask1, s[3]); w->nlimbs = wsize * LIMBS_PER_LIMB64; MPN_NORMALIZE (wp, w->nlimbs); } void _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx) { static const mpi_limb64_t p_mult[12][5] = { { /* P * -3 */ LIMB64_C(0x00000000U, 0x00000003U), LIMB64_C(0xfffffffdU, 0x00000000U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000002U, 0xfffffffcU), LIMB64_C(0xffffffffU, 0xfffffffdU) }, { /* P * -2 */ LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0xfffffffeU, 0x00000000U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000001U, 0xfffffffdU), LIMB64_C(0xffffffffU, 0xfffffffeU) }, { /* P * -1 */ LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0xffffffffU, 0x00000000U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xfffffffeU), LIMB64_C(0xffffffffU, 0xffffffffU) }, { /* P * 0 */ LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U) }, { /* P * 1 */ LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xffffffffU, 0x00000001U), LIMB64_C(0x00000000U, 0x00000000U) }, { /* P * 2 */ LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0x00000001U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffeU, 0x00000002U), LIMB64_C(0x00000000U, 0x00000001U) }, { /* P * 3 */ LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0x00000002U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffdU, 0x00000003U), LIMB64_C(0x00000000U, 0x00000002U) }, { /* P * 4 */ LIMB64_C(0xffffffffU, 0xfffffffcU), LIMB64_C(0x00000003U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffcU, 0x00000004U), LIMB64_C(0x00000000U, 0x00000003U) }, { /* P * 5 */ LIMB64_C(0xffffffffU, 0xfffffffbU), LIMB64_C(0x00000004U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffbU, 0x00000005U), LIMB64_C(0x00000000U, 0x00000004U) }, { /* P * 6 */ LIMB64_C(0xffffffffU, 0xfffffffaU), LIMB64_C(0x00000005U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffaU, 0x00000006U), LIMB64_C(0x00000000U, 0x00000005U) }, { /* P * 7 */ LIMB64_C(0xffffffffU, 0xfffffff9U), LIMB64_C(0x00000006U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffff9U, 0x00000007U), LIMB64_C(0x00000000U, 0x00000006U) } }; const mpi_limb64_t zero = LIMB_TO64(0); mpi_ptr_t wp; mpi_limb64_t s[(256 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64 + 1]; mpi_limb64_t t[DIM(s)]; mpi_limb64_t d[DIM(s)]; mpi_limb64_t e[DIM(s)]; const mpi_size_t wsize = DIM(s) - 1; mpi_size_t psize = ctx->p->nlimbs; mpi_limb_t mask1; mpi_limb_t mask2; mpi_limb_t mask3; mpi_limb_t s_is_negative; mpi_limb_t d_is_negative; int carry; MPN_NORMALIZE (w->d, w->nlimbs); if (mpi_nbits_more_than (w, 2 * 256)) log_bug ("W must be less than m^2\n"); RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64); RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64); ctx->p->nlimbs = psize; wp = w->d; prefetch (p_mult, sizeof(p_mult)); /* See "FIPS 186-4, D.2.3 Curve P-256". */ /* "S1 + S2" with 64-bit limbs: * [A15:A14]:[A13:A12]:[A11:0]:[0:0] * + [0:A15]:[A14:A13]:[A12:0]:[0:0] * => s[4]:s[3]:s[2]:s[1]:s[0] */ s[0] = zero; ADD4_LIMB64 (s[4], s[3], s[2], s[1], zero, LOAD64(wp, 14 / 2), LOAD64(wp, 12 / 2), LIMB64_HILO(LOAD32(wp, 11), 0), zero, LIMB64_HILO(0, LOAD32(wp, 15)), LOAD64_UNALIGNED(wp, 13 / 2), LIMB64_HILO(LOAD32(wp, 12), 0)); /* "S3 + S4" with 64-bit limbs: * [A15:A14]:[ 0: 0]:[ 0:A10]:[ A9:A8] * + [A8:A13]:[A15:A14]:[A13:A11]:[A10:A9] * => t[4]:t[3]:t[2]:t[1]:t[0] */ ADD5_LIMB64 (t[4], t[3], t[2], t[1], t[0], zero, LOAD64(wp, 14 / 2), zero, LIMB64_HILO(0, LOAD32(wp, 10)), LOAD64(wp, 8 / 2), zero, LIMB64_HILO(LOAD32(wp, 8), LOAD32(wp, 13)), LOAD64(wp, 14 / 2), LIMB64_HILO(LOAD32(wp, 13), LOAD32(wp, 11)), LOAD64_UNALIGNED(wp, 9 / 2)); /* "2*S1 + 2*S2" */ ADD5_LIMB64 (s[4], s[3], s[2], s[1], s[0], s[4], s[3], s[2], s[1], s[0], s[4], s[3], s[2], s[1], s[0]); /* "T + S3 + S4" */ ADD5_LIMB64 (t[4], t[3], t[2], t[1], t[0], t[4], t[3], t[2], t[1], t[0], zero, LOAD64(wp, 6 / 2), LOAD64(wp, 4 / 2), LOAD64(wp, 2 / 2), LOAD64(wp, 0 / 2)); /* "2*S1 + 2*S2 - D3" with 64-bit limbs: * s[4]: s[3]: s[2]: s[1]: s[0] * - [A12:0]:[A10:A9]:[A8:A15]:[A14:A13] * => s[4]:s[3]:s[2]:s[1]:s[0] */ SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0], s[4], s[3], s[2], s[1], s[0], zero, LIMB64_HILO(LOAD32(wp, 12), 0), LOAD64_UNALIGNED(wp, 9 / 2), LIMB64_HILO(LOAD32(wp, 8), LOAD32(wp, 15)), LOAD64_UNALIGNED(wp, 13 / 2)); /* "T + 2*S1 + 2*S2 + S3 + S4 - D3" */ ADD5_LIMB64 (s[4], s[3], s[2], s[1], s[0], s[4], s[3], s[2], s[1], s[0], t[4], t[3], t[2], t[1], t[0]); /* "D1 + D2" with 64-bit limbs: * [0:A13]:[A12:A11] + [A15:A14]:[A13:A12] => d[2]:d[1]:d[0] * [A10:A8] + [A11:A9] => d[4]:d[3] */ ADD3_LIMB64 (d[2], d[1], d[0], zero, LIMB64_HILO(0, LOAD32(wp, 13)), LOAD64_UNALIGNED(wp, 11 / 2), zero, LOAD64(wp, 14 / 2), LOAD64(wp, 12 / 2)); ADD2_LIMB64 (d[4], d[3], zero, LIMB64_HILO(LOAD32(wp, 10), LOAD32(wp, 8)), zero, LIMB64_HILO(LOAD32(wp, 11), LOAD32(wp, 9))); /* "D1 + D2 + D4" with 64-bit limbs: * d[4]: d[3]: d[2]: d[1]: d[0] * - [A13:0]:[A11:A10]:[A9:0]:[A15:A14] * => d[4]:d[3]:d[2]:d[1]:d[0] */ ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0], d[4], d[3], d[2], d[1], d[0], zero, LIMB64_HILO(LOAD32(wp, 13), 0), LOAD64(wp, 10 / 2), LIMB64_HILO(LOAD32(wp, 9), 0), LOAD64(wp, 14 / 2)); /* "T + 2*S1 + 2*S2 + S3 + S4 - D1 - D2 - D3 - D4" */ SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0], s[4], s[3], s[2], s[1], s[0], d[4], d[3], d[2], d[1], d[0]); /* mod p: * 's[4]' holds carry value (-4..6). Subtract (carry + 1) * p. Result * will be with in range -2*p...p. Handle result being negative with * addition and conditional store. */ carry = LO32_LIMB64(s[4]); SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0], s[4], s[3], s[2], s[1], s[0], p_mult[carry + 4][4], p_mult[carry + 4][3], p_mult[carry + 4][2], p_mult[carry + 4][1], p_mult[carry + 4][0]); /* Add 1*P */ ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0], s[4], s[3], s[2], s[1], s[0], zero, p_mult[0 + 4][3], p_mult[0 + 4][2], p_mult[0 + 4][1], p_mult[0 + 4][0]); /* Add 2*P */ ADD5_LIMB64 (e[4], e[3], e[2], e[1], e[0], s[4], s[3], s[2], s[1], s[0], zero, p_mult[1 + 4][3], p_mult[1 + 4][2], p_mult[1 + 4][1], p_mult[1 + 4][0]); s_is_negative = LO32_LIMB64(s[4]) >> 31; d_is_negative = LO32_LIMB64(d[4]) >> 31; - mask3 = _gcry_ct_vzero - d_is_negative; - mask2 = (_gcry_ct_vzero - s_is_negative) & ~mask3; - mask1 = (s_is_negative - _gcry_ct_vone) & ~mask3; + mask3 = ct_limb_gen_mask(d_is_negative); + mask2 = ct_limb_gen_mask(s_is_negative) & ~mask3; + mask1 = ct_limb_gen_inv_mask(s_is_negative) & ~mask3; s[0] = LIMB_OR64(MASK_AND64(mask2, d[0]), MASK_AND64(mask1, s[0])); s[1] = LIMB_OR64(MASK_AND64(mask2, d[1]), MASK_AND64(mask1, s[1])); s[2] = LIMB_OR64(MASK_AND64(mask2, d[2]), MASK_AND64(mask1, s[2])); s[3] = LIMB_OR64(MASK_AND64(mask2, d[3]), MASK_AND64(mask1, s[3])); s[0] = LIMB_OR64(MASK_AND64(mask3, e[0]), s[0]); s[1] = LIMB_OR64(MASK_AND64(mask3, e[1]), s[1]); s[2] = LIMB_OR64(MASK_AND64(mask3, e[2]), s[2]); s[3] = LIMB_OR64(MASK_AND64(mask3, e[3]), s[3]); STORE64(wp, 0, s[0]); STORE64(wp, 1, s[1]); STORE64(wp, 2, s[2]); STORE64(wp, 3, s[3]); w->nlimbs = wsize * LIMBS_PER_LIMB64; MPN_NORMALIZE (wp, w->nlimbs); } void _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx) { static const mpi_limb64_t p_mult[11][7] = { { /* P * -2 */ LIMB64_C(0xfffffffeU, 0x00000002U), LIMB64_C(0x00000001U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffffeU) }, { /* P * -1 */ LIMB64_C(0xffffffffU, 0x00000001U), LIMB64_C(0x00000000U, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xffffffffU, 0xffffffffU) }, { /* P * 0 */ LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U) }, { /* P * 1 */ LIMB64_C(0x00000000U, 0xffffffffU), LIMB64_C(0xffffffffU, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U) }, { /* P * 2 */ LIMB64_C(0x00000001U, 0xfffffffeU), LIMB64_C(0xfffffffeU, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000001U) }, { /* P * 3 */ LIMB64_C(0x00000002U, 0xfffffffdU), LIMB64_C(0xfffffffdU, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffffcU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000002U) }, { /* P * 4 */ LIMB64_C(0x00000003U, 0xfffffffcU), LIMB64_C(0xfffffffcU, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffffbU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000003U) }, { /* P * 5 */ LIMB64_C(0x00000004U, 0xfffffffbU), LIMB64_C(0xfffffffbU, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffffaU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000004U) }, { /* P * 6 */ LIMB64_C(0x00000005U, 0xfffffffaU), LIMB64_C(0xfffffffaU, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffff9U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000005U) }, { /* P * 7 */ LIMB64_C(0x00000006U, 0xfffffff9U), LIMB64_C(0xfffffff9U, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffff8U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000006U) }, { /* P * 8 */ LIMB64_C(0x00000007U, 0xfffffff8U), LIMB64_C(0xfffffff8U, 0x00000000U), LIMB64_C(0xffffffffU, 0xfffffff7U), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000007U) }, }; const mpi_limb64_t zero = LIMB_TO64(0); mpi_ptr_t wp; mpi_limb64_t s[(384 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64 + 1]; mpi_limb64_t t[DIM(s)]; mpi_limb64_t d[DIM(s)]; mpi_limb64_t x[DIM(s)]; #if (BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB) && defined(WORDS_BIGENDIAN) mpi_limb_t wp_shr32[(DIM(s) - 1) * LIMBS_PER_LIMB64]; #endif const mpi_size_t wsize = DIM(s) - 1; mpi_size_t psize = ctx->p->nlimbs; mpi_limb_t mask1; mpi_limb_t mask2; mpi_limb_t s_is_negative; int carry; MPN_NORMALIZE (w->d, w->nlimbs); if (mpi_nbits_more_than (w, 2 * 384)) log_bug ("W must be less than m^2\n"); RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64); RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64); ctx->p->nlimbs = psize; wp = w->d; prefetch (p_mult, sizeof(p_mult)); /* See "FIPS 186-4, D.2.4 Curve P-384". */ #if BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB # ifdef WORDS_BIGENDIAN # define LOAD64_SHR32(idx) LOAD64(wp_shr32, ((idx) / 2 - wsize)) _gcry_mpih_rshift (wp_shr32, wp + 384 / BITS_PER_MPI_LIMB, wsize * LIMBS_PER_LIMB64, 32); # else # define LOAD64_SHR32(idx) LOAD64_UNALIGNED(wp, idx / 2) #endif #else # define LOAD64_SHR32(idx) LIMB64_HILO(LOAD32(wp, (idx) + 1), LOAD32(wp, idx)) #endif /* "S1 + S1" with 64-bit limbs: * [0:A23]:[A22:A21] * + [0:A23]:[A22:A21] * => s[3]:s[2] */ ADD2_LIMB64 (s[3], s[2], LIMB64_HILO(0, LOAD32(wp, 23)), LOAD64_SHR32(21), LIMB64_HILO(0, LOAD32(wp, 23)), LOAD64_SHR32(21)); /* "S5 + S6" with 64-bit limbs: * [A23:A22]:[A21:A20]:[ 0:0]:[0: 0] * + [ 0: 0]:[A23:A22]:[A21:0]:[0:A20] * => x[4]:x[3]:x[2]:x[1]:x[0] */ x[0] = LIMB64_HILO(0, LOAD32(wp, 20)); x[1] = LIMB64_HILO(LOAD32(wp, 21), 0); ADD3_LIMB64 (x[4], x[3], x[2], zero, LOAD64(wp, 22 / 2), LOAD64(wp, 20 / 2), zero, zero, LOAD64(wp, 22 / 2)); /* "D2 + D3" with 64-bit limbs: * [0:A23]:[A22:A21]:[A20:0] * + [0:A23]:[A23:0]:[0:0] * => d[2]:d[1]:d[0] */ d[0] = LIMB64_HILO(LOAD32(wp, 20), 0); ADD2_LIMB64 (d[2], d[1], LIMB64_HILO(0, LOAD32(wp, 23)), LOAD64_SHR32(21), LIMB64_HILO(0, LOAD32(wp, 23)), LIMB64_HILO(LOAD32(wp, 23), 0)); /* "2*S1 + S5 + S6" with 64-bit limbs: * s[4]:s[3]:s[2]:s[1]:s[0] * + x[4]:x[3]:x[2]:x[1]:x[0] * => s[4]:s[3]:s[2]:s[1]:s[0] */ s[0] = x[0]; s[1] = x[1]; ADD3_LIMB64(s[4], s[3], s[2], zero, s[3], s[2], x[4], x[3], x[2]); /* "T + S2" with 64-bit limbs: * [A11:A10]:[ A9: A8]:[ A7: A6]:[ A5: A4]:[ A3: A2]:[ A1: A0] * + [A23:A22]:[A21:A20]:[A19:A18]:[A17:A16]:[A15:A14]:[A13:A12] * => t[6]:t[5]:t[4]:t[3]:t[2]:t[1]:t[0] */ ADD7_LIMB64 (t[6], t[5], t[4], t[3], t[2], t[1], t[0], zero, LOAD64(wp, 10 / 2), LOAD64(wp, 8 / 2), LOAD64(wp, 6 / 2), LOAD64(wp, 4 / 2), LOAD64(wp, 2 / 2), LOAD64(wp, 0 / 2), zero, LOAD64(wp, 22 / 2), LOAD64(wp, 20 / 2), LOAD64(wp, 18 / 2), LOAD64(wp, 16 / 2), LOAD64(wp, 14 / 2), LOAD64(wp, 12 / 2)); /* "2*S1 + S4 + S5 + S6" with 64-bit limbs: * s[6]: s[5]: s[4]: s[3]: s[2]: s[1]: s[0] * + [A19:A18]:[A17:A16]:[A15:A14]:[A13:A12]:[A20:0]:[A23:0] * => s[6]:s[5]:s[4]:s[3]:s[2]:s[1]:s[0] */ ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0], zero, zero, s[4], s[3], s[2], s[1], s[0], zero, LOAD64(wp, 18 / 2), LOAD64(wp, 16 / 2), LOAD64(wp, 14 / 2), LOAD64(wp, 12 / 2), LIMB64_HILO(LOAD32(wp, 20), 0), LIMB64_HILO(LOAD32(wp, 23), 0)); /* "D1 + D2 + D3" with 64-bit limbs: * d[6]: d[5]: d[4]: d[3]: d[2]: d[1]: d[0] * + [A22:A21]:[A20:A19]:[A18:A17]:[A16:A15]:[A14:A13]:[A12:A23] * => d[6]:d[5]:d[4]:d[3]:d[2]:d[1]:d[0] */ ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0], zero, zero, zero, zero, d[2], d[1], d[0], zero, LOAD64_SHR32(21), LOAD64_SHR32(19), LOAD64_SHR32(17), LOAD64_SHR32(15), LOAD64_SHR32(13), LIMB64_HILO(LOAD32(wp, 12), LOAD32(wp, 23))); /* "2*S1 + S3 + S4 + S5 + S6" with 64-bit limbs: * s[6]: s[5]: s[4]: s[3]: s[2]: s[1]: s[0] * + [A20:A19]:[A18:A17]:[A16:A15]:[A14:A13]:[A12:A23]:[A22:A21] * => s[6]:s[5]:s[4]:s[3]:s[2]:s[1]:s[0] */ ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0], s[6], s[5], s[4], s[3], s[2], s[1], s[0], zero, LOAD64_SHR32(19), LOAD64_SHR32(17), LOAD64_SHR32(15), LOAD64_SHR32(13), LIMB64_HILO(LOAD32(wp, 12), LOAD32(wp, 23)), LOAD64_SHR32(21)); /* "T + 2*S1 + S2 + S3 + S4 + S5 + S6" */ ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0], s[6], s[5], s[4], s[3], s[2], s[1], s[0], t[6], t[5], t[4], t[3], t[2], t[1], t[0]); /* "T + 2*S1 + S2 + S3 + S4 + S5 + S6 - D1 - D2 - D3" */ SUB7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0], s[6], s[5], s[4], s[3], s[2], s[1], s[0], d[6], d[5], d[4], d[3], d[2], d[1], d[0]); #undef LOAD64_SHR32 /* mod p: * 's[6]' holds carry value (-3..7). Subtract (carry + 1) * p. Result * will be with in range -p...p. Handle result being negative with * addition and conditional store. */ carry = LO32_LIMB64(s[6]); SUB7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0], s[6], s[5], s[4], s[3], s[2], s[1], s[0], p_mult[carry + 3][6], p_mult[carry + 3][5], p_mult[carry + 3][4], p_mult[carry + 3][3], p_mult[carry + 3][2], p_mult[carry + 3][1], p_mult[carry + 3][0]); ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0], s[6], s[5], s[4], s[3], s[2], s[1], s[0], zero, p_mult[0 + 3][5], p_mult[0 + 3][4], p_mult[0 + 3][3], p_mult[0 + 3][2], p_mult[0 + 3][1], p_mult[0 + 3][0]); s_is_negative = LO32_LIMB64(s[6]) >> 31; - mask2 = _gcry_ct_vzero - s_is_negative; - mask1 = s_is_negative - _gcry_ct_vone; + mask2 = ct_limb_gen_mask(s_is_negative); + mask1 = ct_limb_gen_inv_mask(s_is_negative); STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]); STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]); STORE64_COND(wp, 2, mask2, d[2], mask1, s[2]); STORE64_COND(wp, 3, mask2, d[3], mask1, s[3]); STORE64_COND(wp, 4, mask2, d[4], mask1, s[4]); STORE64_COND(wp, 5, mask2, d[5], mask1, s[5]); w->nlimbs = wsize * LIMBS_PER_LIMB64; MPN_NORMALIZE (wp, w->nlimbs); #if (BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB) && defined(WORDS_BIGENDIAN) wipememory(wp_shr32, sizeof(wp_shr32)); #endif } void _gcry_mpi_ec_nist521_mod (gcry_mpi_t w, mpi_ec_t ctx) { mpi_limb_t s[(521 + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB]; const mpi_size_t wsize = DIM(s); mpi_limb_t cy; mpi_ptr_t wp; MPN_NORMALIZE (w->d, w->nlimbs); if (mpi_nbits_more_than (w, 2 * 521)) log_bug ("W must be less than m^2\n"); RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2); wp = w->d; /* See "FIPS 186-4, D.2.5 Curve P-521". */ _gcry_mpih_rshift (s, wp + wsize - 1, wsize, 521 % BITS_PER_MPI_LIMB); s[wsize - 1] &= (1 << (521 % BITS_PER_MPI_LIMB)) - 1; wp[wsize - 1] &= (1 << (521 % BITS_PER_MPI_LIMB)) - 1; _gcry_mpih_add_n (wp, wp, s, wsize); /* "mod p" */ cy = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize); _gcry_mpih_add_n (s, wp, ctx->p->d, wsize); mpih_set_cond (wp, s, wsize, mpih_limb_is_not_zero (cy)); w->nlimbs = wsize; MPN_NORMALIZE (wp, w->nlimbs); } #endif /* !ASM_DISABLED */ diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h index 70045037..935bf3e1 100644 --- a/mpi/mpi-internal.h +++ b/mpi/mpi-internal.h @@ -1,323 +1,327 @@ /* mpi-internal.h - Internal to the Multi Precision Integers * Copyright (C) 1994, 1996, 1998, 2000, 2002, * 2003 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * SPDX-License-Identifier: LGPL-2.1-or-later * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. */ #ifndef G10_MPI_INTERNAL_H #define G10_MPI_INTERNAL_H #include "mpi-asm-defs.h" #ifndef BITS_PER_MPI_LIMB #if BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_INT typedef unsigned int mpi_limb_t; typedef signed int mpi_limb_signed_t; #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG typedef unsigned long int mpi_limb_t; typedef signed long int mpi_limb_signed_t; #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG_LONG typedef unsigned long long int mpi_limb_t; typedef signed long long int mpi_limb_signed_t; #elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_SHORT typedef unsigned short int mpi_limb_t; typedef signed short int mpi_limb_signed_t; #else #error BYTES_PER_MPI_LIMB does not match any C type #endif #define BITS_PER_MPI_LIMB (8*BYTES_PER_MPI_LIMB) #endif /*BITS_PER_MPI_LIMB*/ #include "mpi.h" +#include "const-time.h" /* If KARATSUBA_THRESHOLD is not already defined, define it to a * value which is good on most machines. */ /* tested 4, 16, 32 and 64, where 16 gave the best performance when * checking a 768 and a 1024 bit ElGamal signature. * (wk 22.12.97) */ #ifndef KARATSUBA_THRESHOLD #define KARATSUBA_THRESHOLD 16 #endif /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ #if KARATSUBA_THRESHOLD < 2 #undef KARATSUBA_THRESHOLD #define KARATSUBA_THRESHOLD 2 #endif typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ typedef int mpi_size_t; /* (must be a signed type) */ #define ABS(x) (x >= 0 ? x : -x) #define MIN(l,o) ((l) < (o) ? (l) : (o)) #define MAX(h,i) ((h) > (i) ? (h) : (i)) #define RESIZE_IF_NEEDED(a,b) \ do { \ if( (a)->alloced < (b) ) \ mpi_resize((a), (b)); \ } while(0) #define RESIZE_AND_CLEAR_IF_NEEDED(a,b) \ do { \ if( (a)->nlimbs < (b) ) \ mpi_resize((a), (b)); \ } while(0) /* Copy N limbs from S to D. */ #define MPN_COPY( d, s, n) \ do { \ mpi_size_t _i; \ for( _i = 0; _i < (n); _i++ ) \ (d)[_i] = (s)[_i]; \ } while(0) #define MPN_COPY_INCR( d, s, n) \ do { \ mpi_size_t _i; \ for( _i = 0; _i < (n); _i++ ) \ (d)[_i] = (s)[_i]; \ } while (0) #define MPN_COPY_DECR( d, s, n ) \ do { \ mpi_size_t _i; \ for( _i = (n)-1; _i >= 0; _i--) \ (d)[_i] = (s)[_i]; \ } while(0) /* Zero N limbs at D */ #define MPN_ZERO(d, n) \ do { \ int _i; \ for( _i = 0; _i < (n); _i++ ) \ (d)[_i] = 0; \ } while (0) #define MPN_NORMALIZE(d, n) \ do { \ while( (n) > 0 ) { \ if( (d)[(n)-1] ) \ break; \ (n)--; \ } \ } while(0) #define MPN_NORMALIZE_NOT_ZERO(d, n) \ do { \ for(;;) { \ if( (d)[(n)-1] ) \ break; \ (n)--; \ } \ } while(0) #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ do { \ if( (size) < KARATSUBA_THRESHOLD ) \ mul_n_basecase (prodp, up, vp, size); \ else \ mul_n (prodp, up, vp, size, tspace); \ } while (0) /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). * If this would yield overflow, DI should be the largest possible number * (i.e., only ones). For correct operation, the most significant bit of D * has to be set. Put the quotient in Q and the remainder in R. */ #define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ do { \ mpi_limb_t _ql GCC_ATTR_UNUSED; \ mpi_limb_t _q, _r; \ mpi_limb_t _xh, _xl; \ umul_ppmm (_q, _ql, (nh), (di)); \ _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \ umul_ppmm (_xh, _xl, _q, (d)); \ sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \ if( _xh ) { \ sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ _q++; \ if( _xh) { \ sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ _q++; \ } \ } \ if( _r >= (d) ) { \ _r -= (d); \ _q++; \ } \ (r) = _r; \ (q) = _q; \ } while (0) /*-- mpiutil.c --*/ #define mpi_alloc_limb_space(n,f) _gcry_mpi_alloc_limb_space((n),(f)) mpi_ptr_t _gcry_mpi_alloc_limb_space( unsigned nlimbs, int sec ); void _gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs ); void _gcry_mpi_assign_limb_space( gcry_mpi_t a, mpi_ptr_t ap, unsigned nlimbs ); /*-- mpi-bit.c --*/ #define mpi_rshift_limbs(a,n) _gcry_mpi_rshift_limbs ((a), (n)) #define mpi_lshift_limbs(a,n) _gcry_mpi_lshift_limbs ((a), (n)) void _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count ); void _gcry_mpi_lshift_limbs( gcry_mpi_t a, unsigned int count ); /*-- mpih-add.c --*/ mpi_limb_t _gcry_mpih_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb ); mpi_limb_t _gcry_mpih_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_ptr_t s2_ptr, mpi_size_t size); mpi_limb_t _gcry_mpih_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size); /*-- mpih-sub.c --*/ mpi_limb_t _gcry_mpih_sub_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb ); mpi_limb_t _gcry_mpih_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_ptr_t s2_ptr, mpi_size_t size); mpi_limb_t _gcry_mpih_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size); /*-- mpih-cmp.c --*/ int _gcry_mpih_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size ); /*-- mpih-mul.c --*/ struct karatsuba_ctx { struct karatsuba_ctx *next; mpi_ptr_t tspace; unsigned int tspace_nlimbs; mpi_size_t tspace_size; mpi_ptr_t tp; unsigned int tp_nlimbs; mpi_size_t tp_size; }; void _gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx ); mpi_limb_t _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); mpi_limb_t _gcry_mpih_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); void _gcry_mpih_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size); mpi_limb_t _gcry_mpih_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, mpi_ptr_t vp, mpi_size_t vsize); void _gcry_mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ); void _gcry_mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace); void _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, mpi_ptr_t vp, mpi_size_t vsize, struct karatsuba_ctx *ctx ); /*-- mpih-mul_1.c (or xxx/cpu/ *.S) --*/ mpi_limb_t _gcry_mpih_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); /*-- mpih-div.c --*/ mpi_limb_t _gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, mpi_limb_t divisor_limb); mpi_limb_t _gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs, mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize); mpi_limb_t _gcry_mpih_divmod_1( mpi_ptr_t quot_ptr, mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, mpi_limb_t divisor_limb); /*-- mpih-shift.c --*/ mpi_limb_t _gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt); mpi_limb_t _gcry_mpih_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt); /*-- mpih-const-time.c --*/ #define mpih_set_cond(w,u,s,o) _gcry_mpih_set_cond ((w),(u),(s),(o)) #define mpih_add_n_cond(w,u,v,s,o) _gcry_mpih_add_n_cond ((w),(u),(v),(s),(o)) #define mpih_sub_n_cond(w,u,v,s,o) _gcry_mpih_sub_n_cond ((w),(u),(v),(s),(o)) #define mpih_swap_cond(u,v,s,o) _gcry_mpih_swap_cond ((u),(v),(s),(o)) #define mpih_abs_cond(w,u,s,o) _gcry_mpih_abs_cond ((w),(u),(s),(o)) #define mpih_mod(v,vs,u,us) _gcry_mpih_mod ((v),(vs),(u),(us)) +DEFINE_CT_TYPE_GEN_MASK(limb, mpi_limb_t) +DEFINE_CT_TYPE_GEN_INV_MASK(limb, mpi_limb_t) + static inline int mpih_limb_is_zero (mpi_limb_t a) { /* Sign bit set if A == 0. */ a = ~a & ~(-a); return a >> (BITS_PER_MPI_LIMB - 1); } static inline int mpih_limb_is_not_zero (mpi_limb_t a) { /* Sign bit set if A != 0. */ a = a | (-a); return a >> (BITS_PER_MPI_LIMB - 1); } void _gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned long op_enable); mpi_limb_t _gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, unsigned long op_enable); mpi_limb_t _gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, unsigned long op_enable); void _gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, unsigned long op_enable); void _gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned long op_enable); mpi_ptr_t _gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize, mpi_ptr_t up, mpi_size_t usize); int _gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v); /* Define stuff for longlong.h. */ #define W_TYPE_SIZE BITS_PER_MPI_LIMB typedef mpi_limb_t UWtype; typedef unsigned int UHWtype; #if defined (__GNUC__) typedef unsigned int UQItype __attribute__ ((mode (QI))); typedef int SItype __attribute__ ((mode (SI))); typedef unsigned int USItype __attribute__ ((mode (SI))); typedef int DItype __attribute__ ((mode (DI))); typedef unsigned int UDItype __attribute__ ((mode (DI))); #else typedef unsigned char UQItype; typedef long SItype; typedef unsigned long USItype; #endif #ifdef __GNUC__ #include "mpi-inline.h" #endif #endif /*G10_MPI_INTERNAL_H*/ diff --git a/mpi/mpih-const-time.c b/mpi/mpih-const-time.c index 3d854e8c..4f563cb8 100644 --- a/mpi/mpih-const-time.c +++ b/mpi/mpih-const-time.c @@ -1,217 +1,217 @@ /* mpih-const-time.c - Constant-time MPI helper functions * Copyright (C) 2020 g10 Code GmbH * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include "mpi-internal.h" #include "g10lib.h" #include "const-time.h" #define A_LIMB_1 ((mpi_limb_t)1) /* * W = U when OP_ENABLED=1 * otherwise, W keeps old value */ void _gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned long op_enable) { /* Note: dual mask with AND/OR used for EM leakage mitigation */ - mpi_limb_t mask1 = _gcry_ct_vzero - op_enable; - mpi_limb_t mask2 = op_enable - _gcry_ct_vone; + mpi_limb_t mask1 = ct_limb_gen_mask(op_enable); + mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable); mpi_size_t i; for (i = 0; i < usize; i++) { wp[i] = (wp[i] & mask2) | (up[i] & mask1); } } /* * W = U + V when OP_ENABLED=1 * otherwise, W = U */ mpi_limb_t _gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, unsigned long op_enable) { /* Note: dual mask with AND/OR used for EM leakage mitigation */ - mpi_limb_t mask1 = _gcry_ct_vzero - op_enable; - mpi_limb_t mask2 = op_enable - _gcry_ct_vone; + mpi_limb_t mask1 = ct_limb_gen_mask(op_enable); + mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable); mpi_size_t i; mpi_limb_t cy; cy = 0; for (i = 0; i < usize; i++) { mpi_limb_t u = up[i]; mpi_limb_t x = u + vp[i]; mpi_limb_t cy1 = x < u; mpi_limb_t cy2; x = x + cy; cy2 = x < cy; cy = cy1 | cy2; wp[i] = (u & mask2) | (x & mask1); } return cy & mask1; } /* * W = U - V when OP_ENABLED=1 * otherwise, W = U */ mpi_limb_t _gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, unsigned long op_enable) { /* Note: dual mask with AND/OR used for EM leakage mitigation */ - mpi_limb_t mask1 = _gcry_ct_vzero - op_enable; - mpi_limb_t mask2 = op_enable - _gcry_ct_vone; + mpi_limb_t mask1 = ct_limb_gen_mask(op_enable); + mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable); mpi_size_t i; mpi_limb_t cy; cy = 0; for (i = 0; i < usize; i++) { mpi_limb_t u = up[i]; mpi_limb_t x = u - vp[i]; mpi_limb_t cy1 = x > u; mpi_limb_t cy2; cy2 = x < cy; x = x - cy; cy = cy1 | cy2; wp[i] = (u & mask2) | (x & mask1); } return cy & mask1; } /* * Swap value of U and V when OP_ENABLED=1 * otherwise, no change */ void _gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, unsigned long op_enable) { /* Note: dual mask with AND/OR used for EM leakage mitigation */ - mpi_limb_t mask1 = _gcry_ct_vzero - op_enable; - mpi_limb_t mask2 = op_enable - _gcry_ct_vone; + mpi_limb_t mask1 = ct_limb_gen_mask(op_enable); + mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable); mpi_size_t i; for (i = 0; i < usize; i++) { mpi_limb_t u = up[i]; mpi_limb_t v = vp[i]; up[i] = (u & mask2) | (v & mask1); vp[i] = (u & mask1) | (v & mask2); } } /* * W = -U when OP_ENABLED=1 * otherwise, W = U */ void _gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned long op_enable) { /* Note: dual mask with AND/OR used for EM leakage mitigation */ - mpi_limb_t mask1 = _gcry_ct_vzero - op_enable; - mpi_limb_t mask2 = op_enable - _gcry_ct_vone; + mpi_limb_t mask1 = ct_limb_gen_mask(op_enable); + mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable); mpi_limb_t cy = op_enable; mpi_size_t i; for (i = 0; i < usize; i++) { mpi_limb_t u = up[i]; mpi_limb_t x = ~u + cy; cy = (x < ~u); wp[i] = (u & mask2) | (x & mask1); } } /* * Allocating memory for W, * compute W = V % U, then return W */ mpi_ptr_t _gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize, mpi_ptr_t up, mpi_size_t usize) { int secure; mpi_ptr_t rp; mpi_size_t i; secure = _gcry_is_secure (vp); rp = mpi_alloc_limb_space (usize, secure); MPN_ZERO (rp, usize); for (i = 0; i < vsize * BITS_PER_MPI_LIMB; i++) { unsigned int j = vsize * BITS_PER_MPI_LIMB - 1 - i; unsigned int limbno = j / BITS_PER_MPI_LIMB; unsigned int bitno = j % BITS_PER_MPI_LIMB; mpi_limb_t limb = vp[limbno]; unsigned int the_bit = ((limb & (A_LIMB_1 << bitno)) ? 1 : 0); mpi_limb_t underflow; mpi_limb_t overflow; overflow = _gcry_mpih_lshift (rp, rp, usize, 1); rp[0] |= the_bit; underflow = _gcry_mpih_sub_n (rp, rp, up, usize); mpih_add_n_cond (rp, rp, up, usize, overflow ^ underflow); } return rp; } int _gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v) { int is_all_zero = 1; mpi_size_t i; for (i = 1; i < usize; i++) is_all_zero &= mpih_limb_is_zero (up[i]); if (is_all_zero) { if (up[0] < v) return -1; else if (up[0] > v) return 1; else return 0; } return 1; } diff --git a/mpi/mpiutil.c b/mpi/mpiutil.c index f7506718..07cef257 100644 --- a/mpi/mpiutil.c +++ b/mpi/mpiutil.c @@ -1,792 +1,792 @@ /* mpiutil.ac - Utility functions for MPI * Copyright (C) 1998, 2000, 2001, 2002, 2003, * 2007 Free Software Foundation, Inc. * Copyright (C) 2013 g10 Code GmbH * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include "g10lib.h" #include "mpi-internal.h" #include "mod-source-info.h" #include "const-time.h" #if SIZEOF_UNSIGNED_INT == 2 # define MY_UINT_MAX 0xffff /* (visual check: 0123 ) */ #elif SIZEOF_UNSIGNED_INT == 4 # define MY_UINT_MAX 0xffffffff /* (visual check: 01234567 ) */ #elif SIZEOF_UNSIGNED_INT == 8 # define MY_UINT_MAX 0xffffffffffffffff /* (visual check: 0123456789abcdef ) */ #else # error Need MY_UINT_MAX for this limb size #endif /* Constants allocated right away at startup. */ static gcry_mpi_t constants[MPI_NUMBER_OF_CONSTANTS]; const char * _gcry_mpi_get_hw_config (void) { return mod_source_info + 1; } /* Initialize the MPI subsystem. This is called early and allows to do some initialization without taking care of threading issues. */ gcry_err_code_t _gcry_mpi_init (void) { int idx; unsigned long value; for (idx=0; idx < MPI_NUMBER_OF_CONSTANTS; idx++) { switch (idx) { case MPI_C_ZERO: value = 0; break; case MPI_C_ONE: value = 1; break; case MPI_C_TWO: value = 2; break; case MPI_C_THREE: value = 3; break; case MPI_C_FOUR: value = 4; break; case MPI_C_EIGHT: value = 8; break; default: log_bug ("invalid mpi_const selector %d\n", idx); } constants[idx] = mpi_alloc_set_ui (value); constants[idx]->flags = (16|32); } return 0; } /**************** * Note: It was a bad idea to use the number of limbs to allocate * because on a alpha the limbs are large but we normally need * integers of n bits - So we should change this to bits (or bytes). * * But mpi_alloc is used in a lot of places :-(. New code * should use mpi_new. */ gcry_mpi_t _gcry_mpi_alloc( unsigned nlimbs ) { gcry_mpi_t a; a = xmalloc( sizeof *a ); a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 0 ) : NULL; a->alloced = nlimbs; a->nlimbs = 0; a->sign = 0; a->flags = 0; return a; } gcry_mpi_t _gcry_mpi_alloc_secure( unsigned nlimbs ) { gcry_mpi_t a; a = xmalloc( sizeof *a ); a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 1 ) : NULL; a->alloced = nlimbs; a->flags = 1; a->nlimbs = 0; a->sign = 0; return a; } mpi_ptr_t _gcry_mpi_alloc_limb_space( unsigned int nlimbs, int secure ) { mpi_ptr_t p; size_t len; len = (nlimbs ? nlimbs : 1) * sizeof (mpi_limb_t); p = secure ? xmalloc_secure (len) : xmalloc (len); if (! nlimbs) *p = 0; return p; } void _gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs) { if (a) { size_t len = nlimbs * sizeof(mpi_limb_t); /* If we have information on the number of allocated limbs, we better wipe that space out. This is a failsafe feature if secure memory has been disabled or was not properly implemented in user provided allocation functions. */ if (len) wipememory (a, len); xfree(a); } } void _gcry_mpi_assign_limb_space( gcry_mpi_t a, mpi_ptr_t ap, unsigned int nlimbs ) { _gcry_mpi_free_limb_space (a->d, a->alloced); a->d = ap; a->alloced = nlimbs; } /**************** * Resize the array of A to NLIMBS. The additional space is cleared * (set to 0). */ void _gcry_mpi_resize (gcry_mpi_t a, unsigned nlimbs) { size_t i; if (nlimbs <= a->alloced) { /* We only need to clear the new space (this is a nop if the limb space is already of the correct size. */ for (i=a->nlimbs; i < a->alloced; i++) a->d[i] = 0; return; } /* Actually resize the limb space. */ if (a->d) { a->d = xrealloc (a->d, nlimbs * sizeof (mpi_limb_t)); for (i=a->nlimbs; i < nlimbs; i++) a->d[i] = 0; } else { if (a->flags & 1) /* Secure memory is wanted. */ a->d = xcalloc_secure (nlimbs , sizeof (mpi_limb_t)); else /* Standard memory. */ a->d = xcalloc (nlimbs , sizeof (mpi_limb_t)); } a->alloced = nlimbs; } void _gcry_mpi_clear( gcry_mpi_t a ) { if (mpi_is_immutable (a)) { mpi_immutable_failed (); return; } a->nlimbs = 0; a->flags = 0; } void _gcry_mpi_free( gcry_mpi_t a ) { if (!a ) return; if ((a->flags & 32)) { #if GPGRT_VERSION_NUMBER >= 0x011600 /* 1.22 */ gpgrt_annotate_leaked_object(a); #endif return; /* Never release a constant. */ } if ((a->flags & 4)) xfree( a->d ); else { _gcry_mpi_free_limb_space(a->d, a->alloced); } /* Check that the flags makes sense. We better allow for bit 1 (value 2) for backward ABI compatibility. */ if ((a->flags & ~(1|2|4|16 |GCRYMPI_FLAG_USER1 |GCRYMPI_FLAG_USER2 |GCRYMPI_FLAG_USER3 |GCRYMPI_FLAG_USER4))) log_bug("invalid flag value in mpi_free\n"); xfree (a); } void _gcry_mpi_immutable_failed (void) { log_info ("Warning: trying to change an immutable MPI\n"); } static void mpi_set_secure( gcry_mpi_t a ) { mpi_ptr_t ap, bp; if ( (a->flags & 1) ) return; a->flags |= 1; ap = a->d; if (!a->nlimbs) { gcry_assert (!ap); return; } bp = mpi_alloc_limb_space (a->alloced, 1); MPN_COPY( bp, ap, a->nlimbs ); a->d = bp; _gcry_mpi_free_limb_space (ap, a->alloced); } gcry_mpi_t _gcry_mpi_set_opaque (gcry_mpi_t a, void *p, unsigned int nbits) { if (!a) a = mpi_alloc(0); if (mpi_is_immutable (a)) { mpi_immutable_failed (); return a; } if( a->flags & 4 ) xfree (a->d); else _gcry_mpi_free_limb_space (a->d, a->alloced); a->d = p; a->alloced = 0; a->nlimbs = 0; a->sign = nbits; a->flags = 4 | (a->flags & (GCRYMPI_FLAG_USER1|GCRYMPI_FLAG_USER2 |GCRYMPI_FLAG_USER3|GCRYMPI_FLAG_USER4)); if (_gcry_is_secure (a->d)) a->flags |= 1; return a; } gcry_mpi_t _gcry_mpi_set_opaque_copy (gcry_mpi_t a, const void *p, unsigned int nbits) { void *d; unsigned int n; n = (nbits+7)/8; d = _gcry_is_secure (p)? xtrymalloc_secure (n) : xtrymalloc (n); if (!d) return NULL; memcpy (d, p, n); return mpi_set_opaque (a, d, nbits); } void * _gcry_mpi_get_opaque (gcry_mpi_t a, unsigned int *nbits) { if( !(a->flags & 4) ) log_bug("mpi_get_opaque on normal mpi\n"); if( nbits ) *nbits = a->sign; return a->d; } void * _gcry_mpi_get_opaque_copy (gcry_mpi_t a, unsigned int *nbits) { const void *s; void *d; unsigned int n; s = mpi_get_opaque (a, nbits); if (!s && nbits) return NULL; n = (*nbits+7)/8; d = _gcry_is_secure (s)? xtrymalloc_secure (n) : xtrymalloc (n); if (d) memcpy (d, s, n); return d; } /**************** * Note: This copy function should not interpret the MPI * but copy it transparently. */ gcry_mpi_t _gcry_mpi_copy (gcry_mpi_t a) { int i; gcry_mpi_t b; if( a && (a->flags & 4) ) { void *p = NULL; if (a->sign) { p = _gcry_is_secure(a->d)? xmalloc_secure ((a->sign+7)/8) : xmalloc ((a->sign+7)/8); if (a->d) memcpy( p, a->d, (a->sign+7)/8 ); } b = mpi_set_opaque( NULL, p, a->sign ); b->flags = a->flags; b->flags &= ~(16|32); /* Reset the immutable and constant flags. */ } else if( a ) { b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs ) : mpi_alloc( a->nlimbs ); b->nlimbs = a->nlimbs; b->sign = a->sign; b->flags = a->flags; b->flags &= ~(16|32); /* Reset the immutable and constant flags. */ for(i=0; i < b->nlimbs; i++ ) b->d[i] = a->d[i]; } else b = NULL; return b; } /* Return true if A is negative. */ int _gcry_mpi_is_neg (gcry_mpi_t a) { if (a->sign && _gcry_mpi_cmp_ui (a, 0)) return 1; else return 0; } /* W = - U */ void _gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u) { if (w != u) mpi_set (w, u); else if (mpi_is_immutable (w)) { mpi_immutable_failed (); return; } w->sign = !u->sign; } /* W = [W] */ void _gcry_mpi_abs (gcry_mpi_t w) { if (mpi_is_immutable (w)) { mpi_immutable_failed (); return; } w->sign = 0; } /**************** * This function allocates an MPI which is optimized to hold * a value as large as the one given in the argument and allocates it * with the same flags as A. */ gcry_mpi_t _gcry_mpi_alloc_like( gcry_mpi_t a ) { gcry_mpi_t b; if( a && (a->flags & 4) ) { int n = (a->sign+7)/8; void *p = _gcry_is_secure(a->d)? xtrymalloc_secure (n) : xtrymalloc (n); memcpy( p, a->d, n ); b = mpi_set_opaque( NULL, p, a->sign ); } else if( a ) { b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs ) : mpi_alloc( a->nlimbs ); b->nlimbs = 0; b->sign = 0; b->flags = a->flags; } else b = NULL; return b; } /* Set U into W and release U. If W is NULL only U will be released. */ void _gcry_mpi_snatch (gcry_mpi_t w, gcry_mpi_t u) { if (w) { if (mpi_is_immutable (w)) { mpi_immutable_failed (); return; } _gcry_mpi_assign_limb_space (w, u->d, u->alloced); w->nlimbs = u->nlimbs; w->sign = u->sign; w->flags = u->flags; u->alloced = 0; u->nlimbs = 0; u->d = NULL; } _gcry_mpi_free (u); } gcry_mpi_t _gcry_mpi_set (gcry_mpi_t w, gcry_mpi_t u) { mpi_ptr_t wp, up; mpi_size_t usize = u->nlimbs; int usign = u->sign; if (!w) w = _gcry_mpi_alloc( mpi_get_nlimbs(u) ); if (mpi_is_immutable (w)) { mpi_immutable_failed (); return w; } RESIZE_IF_NEEDED(w, usize); wp = w->d; up = u->d; MPN_COPY( wp, up, usize ); w->nlimbs = usize; w->flags = u->flags; w->flags &= ~(16|32); /* Reset the immutable and constant flags. */ w->sign = usign; return w; } /**************** * Set the value of W by the one of U, when SET is 1. * Leave the value when SET is 0. * This implementation should be constant-time regardless of SET. */ gcry_mpi_t _gcry_mpi_set_cond (gcry_mpi_t w, const gcry_mpi_t u, unsigned long set) { /* Note: dual mask with AND/OR used for EM leakage mitigation */ - mpi_limb_t mask1 = _gcry_ct_vzero - set; - mpi_limb_t mask2 = set - _gcry_ct_vone; + mpi_limb_t mask1 = ct_limb_gen_mask(set); + mpi_limb_t mask2 = ct_limb_gen_inv_mask(set); mpi_size_t i; mpi_size_t nlimbs = u->alloced; mpi_limb_t xu; mpi_limb_t xw; mpi_limb_t *uu = u->d; mpi_limb_t *uw = w->d; if (w->alloced != u->alloced) log_bug ("mpi_set_cond: different sizes\n"); for (i = 0; i < nlimbs; i++) { xu = uu[i]; xw = uw[i]; uw[i] = (xw & mask2) | (xu & mask1); } xu = u->nlimbs; xw = w->nlimbs; w->nlimbs = (xw & mask2) | (xu & mask1); xu = u->sign; xw = w->sign; w->sign = (xw & mask2) | (xu & mask1); return w; } gcry_mpi_t _gcry_mpi_set_ui (gcry_mpi_t w, unsigned long u) { if (!w) w = _gcry_mpi_alloc (1); /* FIXME: If U is 0 we have no need to resize and thus possible allocating the the limbs. */ if (mpi_is_immutable (w)) { mpi_immutable_failed (); return w; } RESIZE_IF_NEEDED(w, 1); w->d[0] = u; w->nlimbs = u? 1:0; w->sign = 0; w->flags = 0; return w; } /* If U is non-negative and small enough store it as an unsigned int * at W. If the value does not fit into an unsigned int or is * negative return GPG_ERR_ERANGE. Note that we return an unsigned * int so that the value can be used with the bit test functions; in * contrast the other _ui functions take an unsigned long so that on * some platforms they may accept a larger value. On error the value * at W is not changed. */ gcry_err_code_t _gcry_mpi_get_ui (unsigned int *w, gcry_mpi_t u) { mpi_limb_t x; if (u->nlimbs > 1 || u->sign) return GPG_ERR_ERANGE; x = (u->nlimbs == 1) ? u->d[0] : 0; if (sizeof (x) > sizeof (unsigned int) && x > MY_UINT_MAX) return GPG_ERR_ERANGE; *w = x; return 0; } gcry_mpi_t _gcry_mpi_alloc_set_ui( unsigned long u) { gcry_mpi_t w = mpi_alloc(1); w->d[0] = u; w->nlimbs = u? 1:0; w->sign = 0; return w; } void _gcry_mpi_swap (gcry_mpi_t a, gcry_mpi_t b) { struct gcry_mpi tmp; tmp = *a; *a = *b; *b = tmp; } /**************** * Swap the value of A and B, when SWAP is 1. * Leave the value when SWAP is 0. * This implementation should be constant-time regardless of SWAP. */ void _gcry_mpi_swap_cond (gcry_mpi_t a, gcry_mpi_t b, unsigned long swap) { /* Note: dual mask with AND/OR used for EM leakage mitigation */ - mpi_limb_t mask1 = _gcry_ct_vzero - swap; - mpi_limb_t mask2 = swap - _gcry_ct_vone; + mpi_limb_t mask1 = ct_limb_gen_mask(swap); + mpi_limb_t mask2 = ct_limb_gen_inv_mask(swap); mpi_size_t i; mpi_size_t nlimbs; mpi_limb_t *ua = a->d; mpi_limb_t *ub = b->d; mpi_limb_t xa; mpi_limb_t xb; if (a->alloced > b->alloced) nlimbs = b->alloced; else nlimbs = a->alloced; if (a->nlimbs > nlimbs || b->nlimbs > nlimbs) log_bug ("mpi_swap_cond: different sizes\n"); for (i = 0; i < nlimbs; i++) { xa = ua[i]; xb = ub[i]; ua[i] = (xa & mask2) | (xb & mask1); ub[i] = (xa & mask1) | (xb & mask2); } xa = a->nlimbs; xb = b->nlimbs; a->nlimbs = (xa & mask2) | (xb & mask1); b->nlimbs = (xa & mask1) | (xb & mask2); xa = a->sign; xb = b->sign; a->sign = (xa & mask2) | (xb & mask1); b->sign = (xa & mask1) | (xb & mask2); } /**************** * Set bit N of A, when SET is 1. * This implementation should be constant-time regardless of SET. */ void _gcry_mpi_set_bit_cond (gcry_mpi_t a, unsigned int n, unsigned long set) { unsigned int limbno, bitno; mpi_limb_t set_the_bit = !!set; limbno = n / BITS_PER_MPI_LIMB; bitno = n % BITS_PER_MPI_LIMB; a->d[limbno] |= (set_the_bit<flags |= (16|32); break; case GCRYMPI_FLAG_IMMUTABLE: a->flags |= 16; break; case GCRYMPI_FLAG_USER1: case GCRYMPI_FLAG_USER2: case GCRYMPI_FLAG_USER3: case GCRYMPI_FLAG_USER4: a->flags |= flag; break; case GCRYMPI_FLAG_OPAQUE: default: log_bug("invalid flag value\n"); } } void _gcry_mpi_clear_flag (gcry_mpi_t a, enum gcry_mpi_flag flag) { (void)a; /* Not yet used. */ switch (flag) { case GCRYMPI_FLAG_IMMUTABLE: if (!(a->flags & 32)) a->flags &= ~16; break; case GCRYMPI_FLAG_USER1: case GCRYMPI_FLAG_USER2: case GCRYMPI_FLAG_USER3: case GCRYMPI_FLAG_USER4: a->flags &= ~flag; break; case GCRYMPI_FLAG_CONST: case GCRYMPI_FLAG_SECURE: case GCRYMPI_FLAG_OPAQUE: default: log_bug("invalid flag value\n"); } } int _gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag flag) { switch (flag) { case GCRYMPI_FLAG_SECURE: return !!(a->flags & 1); case GCRYMPI_FLAG_OPAQUE: return !!(a->flags & 4); case GCRYMPI_FLAG_IMMUTABLE: return !!(a->flags & 16); case GCRYMPI_FLAG_CONST: return !!(a->flags & 32); case GCRYMPI_FLAG_USER1: case GCRYMPI_FLAG_USER2: case GCRYMPI_FLAG_USER3: case GCRYMPI_FLAG_USER4: return !!(a->flags & flag); default: log_bug("invalid flag value\n"); } /*NOTREACHED*/ return 0; } /* Return a constant MPI descripbed by NO which is one of the MPI_C_xxx macros. There is no need to copy this returned value; it may be used directly. */ gcry_mpi_t _gcry_mpi_const (enum gcry_mpi_constants no) { if ((int)no < 0 || no > MPI_NUMBER_OF_CONSTANTS) log_bug("invalid mpi_const selector %d\n", no); if (!constants[no]) log_bug("MPI subsystem not initialized\n"); return constants[no]; } diff --git a/src/const-time.c b/src/const-time.c index 73bf8b40..0fb53a07 100644 --- a/src/const-time.c +++ b/src/const-time.c @@ -1,86 +1,88 @@ /* const-time.c - Constant-time functions * Copyright (C) 2023 g10 Code GmbH * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include "g10lib.h" #include "const-time.h" +#ifndef HAVE_GCC_ASM_VOLATILE_MEMORY /* These variables are used to generate masks from conditional operation * flag parameters. Use of volatile prevents compiler optimizations from * converting AND-masking to conditional branches. */ volatile unsigned int _gcry_ct_vzero = 0; volatile unsigned int _gcry_ct_vone = 1; +#endif /* * Compare byte arrays of length LEN, return 1 if it's not same, * 0, otherwise. */ unsigned int _gcry_ct_not_memequal (const void *b1, const void *b2, size_t len) { const byte *a = b1; const byte *b = b2; int ab, ba; size_t i; /* Constant-time compare. */ for (i = 0, ab = 0, ba = 0; i < len; i++) { /* If a[i] != b[i], either ab or ba will be negative. */ ab |= a[i] - b[i]; ba |= b[i] - a[i]; } /* 'ab | ba' is negative when buffers are not equal, extract sign bit. */ return ((unsigned int)(ab | ba) >> (sizeof(unsigned int) * 8 - 1)) & 1; } /* * Compare byte arrays of length LEN, return 0 if it's not same, * 1, otherwise. */ unsigned int _gcry_ct_memequal (const void *b1, const void *b2, size_t len) { return _gcry_ct_not_memequal (b1, b2, len) ^ 1; } /* * Copy LEN bytes from memory area SRC to memory area DST, when * OP_ENABLED=1. When DST <= SRC, the memory areas may overlap. When * DST > SRC, the memory areas must not overlap. */ void _gcry_ct_memmov_cond (void *dst, const void *src, size_t len, unsigned long op_enable) { /* Note: dual mask with AND/OR used for EM leakage mitigation */ - unsigned char mask1 = _gcry_ct_vzero - op_enable; - unsigned char mask2 = op_enable - _gcry_ct_vone; + unsigned char mask1 = ct_ulong_gen_mask(op_enable); + unsigned char mask2 = ct_ulong_gen_inv_mask(op_enable); unsigned char *b_dst = dst; const unsigned char *b_src = src; size_t i; for (i = 0; i < len; i++) b_dst[i] = (b_dst[i] & mask2) | (b_src[i] & mask1); } diff --git a/src/const-time.h b/src/const-time.h index e324dcb7..fe07cc7a 100644 --- a/src/const-time.h +++ b/src/const-time.h @@ -1,117 +1,167 @@ /* const-time.h - Constant-time functions * Copyright (C) 2023 g10 Code GmbH * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifndef GCRY_CONST_TIME_H #define GCRY_CONST_TIME_H #include "types.h" #define ct_not_memequal _gcry_ct_not_memequal #define ct_memequal _gcry_ct_memequal #define ct_memmov_cond _gcry_ct_memmov_cond +#ifndef HAVE_GCC_ASM_VOLATILE_MEMORY extern volatile unsigned int _gcry_ct_vzero; extern volatile unsigned int _gcry_ct_vone; +#endif /* * Return 0 if A is 0 and return 1 otherwise. */ static inline unsigned int ct_is_not_zero (unsigned int a) { /* Sign bit set if A != 0. */ a = a | (-a); return a >> (sizeof(unsigned int) * 8 - 1); } /* * Return 1 if A is 0 and return 0 otherwise. */ static inline unsigned int ct_is_zero (unsigned int a) { /* Sign bit set if A == 0. */ a = ~a & ~(-a); return a >> (sizeof(unsigned int) * 8 - 1); } /* * Return 1 if it's not same, 0 if same. */ static inline unsigned int ct_not_equal_byte (unsigned char b0, unsigned char b1) { unsigned int diff; diff = b0; diff ^= b1; return (0U - diff) >> (sizeof (unsigned int)*8 - 1); } /* Compare byte-arrays of length LEN, return 1 if it's not same, 0 otherwise. We use pointer of void *, so that it can be used with any structure. */ unsigned int _gcry_ct_not_memequal (const void *b1, const void *b2, size_t len); /* Compare byte-arrays of length LEN, return 0 if it's not same, 1 otherwise. We use pointer of void *, so that it can be used with any structure. */ unsigned int _gcry_ct_memequal (const void *b1, const void *b2, size_t len); +/* + * Return all bits set if A is 1 and return 0 otherwise. + */ +#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY +# define DEFINE_CT_TYPE_GEN_MASK(name, type) \ + static inline type \ + ct_##name##_gen_mask (unsigned long op_enable) \ + { \ + type mask = -(type)op_enable; \ + asm volatile ("\n" : "+r" (mask) :: "memory"); \ + return mask; \ + } +#else +# define DEFINE_CT_TYPE_GEN_MASK(name, type) \ + static inline type \ + ct_##name##_gen_mask (unsigned long op_enable) \ + { \ + type mask = (type)_gcry_ct_vzero - (type)op_enable; \ + return mask; \ + } +#endif +DEFINE_CT_TYPE_GEN_MASK(uintptr, uintptr_t) +DEFINE_CT_TYPE_GEN_MASK(ulong, unsigned long) + +/* + * Return all bits set if A is 0 and return 1 otherwise. + */ +#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY +# define DEFINE_CT_TYPE_GEN_INV_MASK(name, type) \ + static inline type \ + ct_##name##_gen_inv_mask (unsigned long op_enable) \ + { \ + type mask = (type)op_enable - (type)1; \ + asm volatile ("\n" : "+r" (mask) :: "memory"); \ + return mask; \ + } +#else +# define DEFINE_CT_TYPE_GEN_INV_MASK(name, type) \ + static inline type \ + ct_##name##_gen_inv_mask (unsigned long op_enable) \ + { \ + type mask = (type)op_enable - (type)_gcry_ct_vone; \ + return mask; \ + } +#endif +DEFINE_CT_TYPE_GEN_INV_MASK(uintptr, uintptr_t) +DEFINE_CT_TYPE_GEN_INV_MASK(ulong, unsigned long) + /* * Return A when OP_ENABLED=1 * otherwise, return B */ #define DEFINE_CT_TYPE_SELECT_FUNC(name, type) \ static inline type \ ct_##name##_select (type a, type b, unsigned long op_enable) \ { \ - type mask_b = (type)op_enable - (type)_gcry_ct_vone; \ - type mask_a = (type)_gcry_ct_vzero - (type)op_enable; \ + type mask_b = ct_##name##_gen_inv_mask(op_enable); \ + type mask_a = ct_##name##_gen_mask(op_enable); \ return (mask_a & a) | (mask_b & b); \ } DEFINE_CT_TYPE_SELECT_FUNC(uintptr, uintptr_t) DEFINE_CT_TYPE_SELECT_FUNC(ulong, unsigned long) /* * Return NULL when OP_ENABLED=1 * otherwise, return W */ static inline gcry_sexp_t sexp_null_cond (gcry_sexp_t w, unsigned long op_enable) { uintptr_t o = ct_uintptr_select((uintptr_t)NULL, (uintptr_t)w, op_enable); return (gcry_sexp_t)(void *)o; } /* * Copy LEN bytes from memory area SRC to memory area DST, when * OP_ENABLED=1. When DST <= SRC, the memory areas may overlap. When * DST > SRC, the memory areas must not overlap. */ void _gcry_ct_memmov_cond (void *dst, const void *src, size_t len, unsigned long op_enable); #endif /*GCRY_CONST_TIME_H*/