diff --git a/mpi/mpi-inv.c b/mpi/mpi-inv.c index ee6813b1..0114622d 100644 --- a/mpi/mpi-inv.c +++ b/mpi/mpi-inv.c @@ -1,272 +1,465 @@ /* mpi-inv.c - MPI functions * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include "mpi-internal.h" #include "g10lib.h" +/* + * W = U + V when OP_ENABLED=1 + * otherwise, W = U + */ +static mpi_limb_t +mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, + unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + + cy = 0; + for (i = 0; i < usize; i++) + { + mpi_limb_t x = up[i] + (vp[i] & mask); + mpi_limb_t cy1 = x < up[i]; + mpi_limb_t cy2; + + x = x + cy; + cy2 = x < cy; + cy = cy1 | cy2; + wp[i] = x; + } + + return cy; +} + + +/* + * W = U - V when OP_ENABLED=1 + * otherwise, W = U + */ +static mpi_limb_t +mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, + unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + + cy = 0; + for (i = 0; i < usize; i++) + { + mpi_limb_t x = up[i] - (vp[i] & mask); + mpi_limb_t cy1 = x > up[i]; + mpi_limb_t cy2; + + cy2 = x < cy; + x = x - cy; + cy = cy1 | cy2; + wp[i] = x; + } + + return cy; +} + + +/* + * Swap value of U and V when OP_ENABLED=1 + * otherwise, no change + */ +static void +mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, + unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + + for (i = 0; i < usize; i++) + { + mpi_limb_t x = mask & (up[i] ^ vp[i]); + + up[i] = up[i] ^ x; + vp[i] = vp[i] ^ x; + } +} + + +/* + * W = -U when OP_ENABLED=1 + * otherwise, W = U + */ +static void +mpih_abs_cond (mpi_limb_t *wp, const mpi_limb_t *up, mpi_size_t usize, + unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + mpi_limb_t cy = op_enable; + + for (i = 0; i < usize; i++) + { + mpi_limb_t x = ~up[i] + cy; + + cy = (x < ~up[i]); + wp[i] = up[i] ^ (mask & (x ^ up[i])); + } +} + + +/* + * This uses a modular inversion algorithm designed by Niels Möller + * which was implemented in Nettle. The same algorithm was later also + * adapted to GMP in mpn_sec_invert. + * + * For the description of the algorithm, see Algorithm 5 in Appendix A + * of "Fast Software Polynomial Multiplication on ARM Processors using + * the NEON Engine" by Danilo Câmara, Conrado P. L. Gouvêa, Julio + * López, and Ricardo Dahab: + * https://hal.inria.fr/hal-01506572/document + * + * Note that in the reference above, at the line 2 of Algorithm 5, + * initial value of V was described as V:=1 wrongly. It must be V:=0. + */ +static int +mpi_invm_odd (gcry_mpi_t x, gcry_mpi_t a_orig, gcry_mpi_t n) +{ + mpi_size_t nsize; + gcry_mpi_t a, b, n1h; + gcry_mpi_t u; + unsigned int iterations; + mpi_ptr_t ap, bp, n1hp; + mpi_ptr_t up, vp; + int is_gcd_one; + + nsize = n->nlimbs; + + a = mpi_copy (a_orig); + mpi_resize (a, nsize); + ap = a->d; + + b = mpi_copy (n); + bp = b->d; + + u = mpi_alloc_set_ui (1); + mpi_resize (u, nsize); + up = u->d; + + mpi_resize (x, nsize); + x->nlimbs = nsize; + vp = x->d; + memset (vp, 0, nsize * BYTES_PER_MPI_LIMB); + + n1h = mpi_copy (n); + mpi_rshift (n1h, n1h, 1); + mpi_add_ui (n1h, n1h, 1); + mpi_resize (n1h, nsize); + + n1hp = n1h->d; + + iterations = 2 * nsize * BITS_PER_MPI_LIMB; + + while (iterations-- > 0) + { + mpi_limb_t odd_a, odd_u, underflow, borrow; + + odd_a = ap[0] & 1; + + underflow = mpih_sub_n_cond (ap, ap, bp, nsize, odd_a); + mpih_add_n_cond (bp, bp, ap, nsize, underflow); + mpih_abs_cond (ap, ap, nsize, underflow); + mpih_swap_cond (up, vp, nsize, underflow); + + _gcry_mpih_rshift (ap, ap, nsize, 1); + + borrow = mpih_sub_n_cond (up, up, vp, nsize, odd_a); + mpih_add_n_cond (up, up, n->d, nsize, borrow); + + odd_u = _gcry_mpih_rshift (up, up, nsize, 1) != 0; + mpih_add_n_cond (up, up, n1hp, nsize, odd_u); + } + + is_gcd_one = (mpi_cmp_ui (b, 1) == 0); + + mpi_free (n1h); + mpi_free (u); + mpi_free (b); + mpi_free (a); + + return is_gcd_one; +} + /**************** * Calculate the multiplicative inverse X of A mod N * That is: Find the solution x for * 1 = (a*x) mod n */ -int -_gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n) +static int +mpi_invm_generic (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n) { #if 0 gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, q, t1, t2, t3; gcry_mpi_t ta, tb, tc; u = mpi_copy(a); v = mpi_copy(n); u1 = mpi_alloc_set_ui(1); u2 = mpi_alloc_set_ui(0); u3 = mpi_copy(u); v1 = mpi_alloc_set_ui(0); v2 = mpi_alloc_set_ui(1); v3 = mpi_copy(v); q = mpi_alloc( mpi_get_nlimbs(u)+1 ); t1 = mpi_alloc( mpi_get_nlimbs(u)+1 ); t2 = mpi_alloc( mpi_get_nlimbs(u)+1 ); t3 = mpi_alloc( mpi_get_nlimbs(u)+1 ); while( mpi_cmp_ui( v3, 0 ) ) { mpi_fdiv_q( q, u3, v3 ); mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q); mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3); mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3); mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3); } /* log_debug("result:\n"); log_mpidump("q =", q ); log_mpidump("u1=", u1); log_mpidump("u2=", u2); log_mpidump("u3=", u3); log_mpidump("v1=", v1); log_mpidump("v2=", v2); */ mpi_set(x, u1); mpi_free(u1); mpi_free(u2); mpi_free(u3); mpi_free(v1); mpi_free(v2); mpi_free(v3); mpi_free(q); mpi_free(t1); mpi_free(t2); mpi_free(t3); mpi_free(u); mpi_free(v); #elif 0 /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) * modified according to Michael Penk's solution for Exercise 35 */ /* FIXME: we can simplify this in most cases (see Knuth) */ gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, t1, t2, t3; unsigned k; int sign; u = mpi_copy(a); v = mpi_copy(n); for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) { mpi_rshift(u, u, 1); mpi_rshift(v, v, 1); } u1 = mpi_alloc_set_ui(1); u2 = mpi_alloc_set_ui(0); u3 = mpi_copy(u); v1 = mpi_copy(v); /* !-- used as const 1 */ v2 = mpi_alloc( mpi_get_nlimbs(u) ); mpi_sub( v2, u1, u ); v3 = mpi_copy(v); if( mpi_test_bit(u, 0) ) { /* u is odd */ t1 = mpi_alloc_set_ui(0); t2 = mpi_alloc_set_ui(1); t2->sign = 1; t3 = mpi_copy(v); t3->sign = !t3->sign; goto Y4; } else { t1 = mpi_alloc_set_ui(1); t2 = mpi_alloc_set_ui(0); t3 = mpi_copy(u); } do { do { if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */ mpi_add(t1, t1, v); mpi_sub(t2, t2, u); } mpi_rshift(t1, t1, 1); mpi_rshift(t2, t2, 1); mpi_rshift(t3, t3, 1); Y4: ; } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */ if( !t3->sign ) { mpi_set(u1, t1); mpi_set(u2, t2); mpi_set(u3, t3); } else { mpi_sub(v1, v, t1); sign = u->sign; u->sign = !u->sign; mpi_sub(v2, u, t2); u->sign = sign; sign = t3->sign; t3->sign = !t3->sign; mpi_set(v3, t3); t3->sign = sign; } mpi_sub(t1, u1, v1); mpi_sub(t2, u2, v2); mpi_sub(t3, u3, v3); if( t1->sign ) { mpi_add(t1, t1, v); mpi_sub(t2, t2, u); } } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */ /* mpi_lshift( u3, k ); */ mpi_set(x, u1); mpi_free(u1); mpi_free(u2); mpi_free(u3); mpi_free(v1); mpi_free(v2); mpi_free(v3); mpi_free(t1); mpi_free(t2); mpi_free(t3); #else /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) * modified according to Michael Penk's solution for Exercise 35 * with further enhancement */ gcry_mpi_t u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3; unsigned k; int sign; int odd ; - if (!mpi_cmp_ui (a, 0)) - return 0; /* Inverse does not exists. */ - if (!mpi_cmp_ui (n, 1)) - return 0; /* Inverse does not exists. */ - u = mpi_copy(a); v = mpi_copy(n); for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) { mpi_rshift(u, u, 1); mpi_rshift(v, v, 1); } odd = mpi_test_bit(v,0); u1 = mpi_alloc_set_ui(1); if( !odd ) u2 = mpi_alloc_set_ui(0); u3 = mpi_copy(u); v1 = mpi_copy(v); if( !odd ) { v2 = mpi_alloc( mpi_get_nlimbs(u) ); mpi_sub( v2, u1, u ); /* U is used as const 1 */ } v3 = mpi_copy(v); if( mpi_test_bit(u, 0) ) { /* u is odd */ t1 = mpi_alloc_set_ui(0); if( !odd ) { t2 = mpi_alloc_set_ui(1); t2->sign = 1; } t3 = mpi_copy(v); t3->sign = !t3->sign; goto Y4; } else { t1 = mpi_alloc_set_ui(1); if( !odd ) t2 = mpi_alloc_set_ui(0); t3 = mpi_copy(u); } do { do { if( !odd ) { if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */ mpi_add(t1, t1, v); mpi_sub(t2, t2, u); } mpi_rshift(t1, t1, 1); mpi_rshift(t2, t2, 1); mpi_rshift(t3, t3, 1); } else { if( mpi_test_bit(t1, 0) ) mpi_add(t1, t1, v); mpi_rshift(t1, t1, 1); mpi_rshift(t3, t3, 1); } Y4: ; } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */ if( !t3->sign ) { mpi_set(u1, t1); if( !odd ) mpi_set(u2, t2); mpi_set(u3, t3); } else { mpi_sub(v1, v, t1); sign = u->sign; u->sign = !u->sign; if( !odd ) mpi_sub(v2, u, t2); u->sign = sign; sign = t3->sign; t3->sign = !t3->sign; mpi_set(v3, t3); t3->sign = sign; } mpi_sub(t1, u1, v1); if( !odd ) mpi_sub(t2, u2, v2); mpi_sub(t3, u3, v3); if( t1->sign ) { mpi_add(t1, t1, v); if( !odd ) mpi_sub(t2, t2, u); } } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */ /* mpi_lshift( u3, k ); */ mpi_set(x, u1); mpi_free(u1); mpi_free(v1); mpi_free(t1); if( !odd ) { mpi_free(u2); mpi_free(v2); mpi_free(t2); } mpi_free(u3); mpi_free(v3); mpi_free(t3); mpi_free(u); mpi_free(v); #endif return 1; } + + +int +_gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n) +{ + if (!mpi_cmp_ui (a, 0)) + return 0; /* Inverse does not exists. */ + if (!mpi_cmp_ui (n, 1)) + return 0; /* Inverse does not exists. */ + + if (mpi_test_bit (n, 0) && mpi_cmp (a, n) < 0) + return mpi_invm_odd (x, a, n); + else + return mpi_invm_generic (x, a, n); +}