Add optionally unsafe variant of exp_mod for perf

Attempt to partially solve the performance regression in 3.6.0 without adding too much code size. Signed-off-by: Manuel Pégourié-Gonnard <manuel.pegourie-gonnard@arm.com>
2025-03-30 16:20:11 +00:00 · 2024-06-18 12:52:45 +02:00 · 2024-06-18 12:52:45 +02:00 · 75ed58723e
commit 75ed58723e
parent 8fb5df859c
4 changed files with 148 additions and 17 deletions
--- a/include/mbedtls/bignum.h
+++ b/include/mbedtls/bignum.h
@ -44,6 +44,22 @@
        goto cleanup;        \
    } while (0)

+/* Constants to identify whether a value is public or secret.
+ *
+ * Parameters should be named X_public where X is the name of the
+ * corresponding input parameter.
+ *
+ * Implementation should always check using
+ *  if (X_public == MBEDTLS_MPI_IS_PUBLIC) {
+ *      // unsafe path
+ *  } else {
+ *      // safe path
+ *  }
+ * not the other way round, in order to prevent misuse. (This is, if a value
+ * other than the two below is passed, default to the safe path.) */
+#define MBEDTLS_MPI_IS_PUBLIC  0x2a2a
+#define MBEDTLS_MPI_IS_SECRET  0
+
 /*
 * Maximum size MPIs are allowed to grow to in number of limbs.
 */
@ -880,7 +896,38 @@ int mbedtls_mpi_mod_int(mbedtls_mpi_uint *r, const mbedtls_mpi *A,
                        mbedtls_mpi_sint b);

 /**
- * \brief          Perform a sliding-window exponentiation: X = A^E mod N
+ * \brief          Perform a modular exponentiation: X = A^E mod N
+ *
+ * \param X        The destination MPI. This must point to an initialized MPI.
+ *                 This must not alias E or N.
+ * \param A        The base of the exponentiation.
+ *                 This must point to an initialized MPI.
+ * \param E        The exponent MPI. This must point to an initialized MPI.
+ * \param N        The base for the modular reduction. This must point to an
+ *                 initialized MPI.
+ * \param prec_RR  A helper MPI depending solely on \p N which can be used to
+ *                 speed-up multiple modular exponentiations for the same value
+ *                 of \p N. This may be \c NULL. If it is not \c NULL, it must
+ *                 point to an initialized MPI. If it hasn't been used after
+ *                 the call to mbedtls_mpi_init(), this function will compute
+ *                 the helper value and store it in \p prec_RR for reuse on
+ *                 subsequent calls to this function. Otherwise, the function
+ *                 will assume that \p prec_RR holds the helper value set by a
+ *                 previous call to mbedtls_mpi_exp_mod(), and reuse it.
+ *
+ * \return         \c 0 if successful.
+ * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
+ * \return         #MBEDTLS_ERR_MPI_BAD_INPUT_DATA if \c N is negative or
+ *                 even, or if \c E is negative.
+ * \return         Another negative error code on different kinds of failures.
+ *
+ */
+int mbedtls_mpi_exp_mod_optionally_safe(mbedtls_mpi *X, const mbedtls_mpi *A,
+                                        const mbedtls_mpi *E, const mbedtls_mpi *N,
+                                        mbedtls_mpi *prec_RR, int E_public);
+
+/**
+ * \brief          Perform a modular exponentiation: X = A^E mod N
 *
 * \param X        The destination MPI. This must point to an initialized MPI.
 *                 This must not alias E or N.
--- a/library/bignum.c
+++ b/library/bignum.c
@ -1610,9 +1610,9 @@ int mbedtls_mpi_mod_int(mbedtls_mpi_uint *r, const mbedtls_mpi *A, mbedtls_mpi_s
    return 0;
 }

-int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        const mbedtls_mpi *E, const mbedtls_mpi *N,
-                        mbedtls_mpi *prec_RR)
+int mbedtls_mpi_exp_mod_optionally_safe(mbedtls_mpi *X, const mbedtls_mpi *A,
+                                        const mbedtls_mpi *E, const mbedtls_mpi *N,
+                                        mbedtls_mpi *prec_RR, int E_public)
 {
    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;

@ -1695,7 +1695,15 @@ int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
    {
        mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N->p);
        mbedtls_mpi_core_to_mont_rep(X->p, X->p, N->p, N->n, mm, RR.p, T);
-        mbedtls_mpi_core_exp_mod(X->p, X->p, N->p, N->n, E->p, E->n, RR.p, T);
+        mbedtls_mpi_core_exp_mod_optionally_safe(X->p,
+                                                 X->p,
+                                                 N->p,
+                                                 N->n,
+                                                 E->p,
+                                                 E->n,
+                                                 RR.p,
+                                                 T,
+                                                 E_public);
        mbedtls_mpi_core_from_mont_rep(X->p, X->p, N->p, N->n, mm, T);
    }

@ -1720,6 +1728,13 @@ cleanup:
    return ret;
 }

+int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
+                        const mbedtls_mpi *E, const mbedtls_mpi *N,
+                        mbedtls_mpi *prec_RR)
+{
+    return mbedtls_mpi_exp_mod_optionally_safe(X, A, E, N, prec_RR, MBEDTLS_MPI_IS_SECRET);
+}
+
 /*
 * Greatest common divisor: G = gcd(A, B)  (HAC 14.54)
 */
--- a/library/bignum_core.c
+++ b/library/bignum_core.c
@ -758,14 +758,15 @@ static void exp_mod_precompute_window(const mbedtls_mpi_uint *A,
 * (The difference is that the body in our loop processes a single bit instead
 * of a full window.)
 */
-void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
-                              const mbedtls_mpi_uint *A,
-                              const mbedtls_mpi_uint *N,
-                              size_t AN_limbs,
-                              const mbedtls_mpi_uint *E,
-                              size_t E_limbs,
-                              const mbedtls_mpi_uint *RR,
-                              mbedtls_mpi_uint *T)
+void mbedtls_mpi_core_exp_mod_optionally_safe(mbedtls_mpi_uint *X,
+                                              const mbedtls_mpi_uint *A,
+                                              const mbedtls_mpi_uint *N,
+                                              size_t AN_limbs,
+                                              const mbedtls_mpi_uint *E,
+                                              size_t E_limbs,
+                                              const mbedtls_mpi_uint *RR,
+                                              mbedtls_mpi_uint *T,
+                                              int E_public)
 {
    const size_t wsize = exp_mod_get_window_size(E_limbs * biL);
    const size_t welem = ((size_t) 1) << wsize;
@ -803,6 +804,14 @@ void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
     * (limb_index=0, E_bit_index=0). */
    size_t E_limb_index = E_limbs;
    size_t E_bit_index = 0;
+    if (E_public == MBEDTLS_MPI_IS_PUBLIC) {
+        size_t E_bits = mbedtls_mpi_core_bitlen(E, E_limbs);
+        if (E_bits != 0) {
+            E_limb_index = E_bits / biL;
+            E_bit_index = E_bits % biL;
+        }
+    }
+
    /* At any given time, window contains window_bits bits from E.
     * window_bits can go up to wsize. */
    size_t window_bits = 0;
@ -828,10 +837,14 @@ void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
         * when we've finished processing the exponent. */
        if (window_bits == wsize ||
            (E_bit_index == 0 && E_limb_index == 0)) {
-            /* Select Wtable[window] without leaking window through
-             * memory access patterns. */
-            mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable,
-                                                  AN_limbs, welem, window);
+            if (E_public == MBEDTLS_MPI_IS_PUBLIC) {
+                memcpy(Wselect, Wtable + window * AN_limbs, AN_limbs * ciL);
+            } else {
+                /* Select Wtable[window] without leaking window through
+                 * memory access patterns. */
+                mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable,
+                                                      AN_limbs, welem, window);
+            }
            /* Multiply X by the selected element. */
            mbedtls_mpi_core_montmul(X, X, Wselect, AN_limbs, N, AN_limbs, mm,
                                     temp);
@ -841,6 +854,24 @@ void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
    } while (!(E_bit_index == 0 && E_limb_index == 0));
 }

+void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
+                              const mbedtls_mpi_uint *A,
+                              const mbedtls_mpi_uint *N, size_t AN_limbs,
+                              const mbedtls_mpi_uint *E, size_t E_limbs,
+                              const mbedtls_mpi_uint *RR,
+                              mbedtls_mpi_uint *T)
+{
+    mbedtls_mpi_core_exp_mod_optionally_safe(X,
+                                             A,
+                                             N,
+                                             AN_limbs,
+                                             E,
+                                             E_limbs,
+                                             RR,
+                                             T,
+                                             MBEDTLS_MPI_IS_SECRET);
+}
+
 mbedtls_mpi_uint mbedtls_mpi_core_sub_int(mbedtls_mpi_uint *X,
                                          const mbedtls_mpi_uint *A,
                                          mbedtls_mpi_uint c,  /* doubles as carry */
--- a/library/bignum_core.h
+++ b/library/bignum_core.h
@ -604,6 +604,44 @@ int mbedtls_mpi_core_random(mbedtls_mpi_uint *X,
 */
 size_t mbedtls_mpi_core_exp_mod_working_limbs(size_t AN_limbs, size_t E_limbs);

+/**
+ * \brief            Perform a modular exponentiation with public or secret exponent:
+ *                   X = A^E mod N, where \p A is already in Montgomery form.
+ *
+ * \p X may be aliased to \p A, but not to \p RR or \p E, even if \p E_limbs ==
+ * \p AN_limbs.
+ *
+ * \param[out] X     The destination MPI, as a little endian array of length
+ *                   \p AN_limbs.
+ * \param[in] A      The base MPI, as a little endian array of length \p AN_limbs.
+ *                   Must be in Montgomery form.
+ * \param[in] N      The modulus, as a little endian array of length \p AN_limbs.
+ * \param AN_limbs   The number of limbs in \p X, \p A, \p N, \p RR.
+ * \param[in] E      The exponent, as a little endian array of length \p E_limbs.
+ * \param E_limbs    The number of limbs in \p E.
+ * \param[in] RR     The precomputed residue of 2^{2*biL} modulo N, as a little
+ *                   endian array of length \p AN_limbs.
+ * \param[in,out] T  Temporary storage of at least the number of limbs returned
+ *                   by `mbedtls_mpi_core_exp_mod_working_limbs()`.
+ *                   Its initial content is unused and its final content is
+ *                   indeterminate.
+ *                   It must not alias or otherwise overlap any of the other
+ *                   parameters.
+ *                   It is up to the caller to zeroize \p T when it is no
+ *                   longer needed, and before freeing it if it was dynamically
+ *                   allocated.
+ * \param[in] E_public Set to MBEDTLS_MPI_IS_PUBLIC to gain some performance
+ *                   when the value of E is public.
+ *                   Set to MBEDTLS_MPI_IS_SECRET when the value of E is secret.
+ */
+void mbedtls_mpi_core_exp_mod_optionally_safe(mbedtls_mpi_uint *X,
+                                              const mbedtls_mpi_uint *A,
+                                              const mbedtls_mpi_uint *N, size_t AN_limbs,
+                                              const mbedtls_mpi_uint *E, size_t E_limbs,
+                                              const mbedtls_mpi_uint *RR,
+                                              mbedtls_mpi_uint *T,
+                                              int E_public);
+
 /**
 * \brief            Perform a modular exponentiation with secret exponent:
 *                   X = A^E mod N, where \p A is already in Montgomery form.