From 5c8ea307b878a9860996972c5890c53d8655c525 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Sat, 1 Jan 2022 06:01:45 +0000
Subject: [PATCH 01/21] Reduce number of local MPIs in ECP mixed point addition

`ecp_add_mixed()` and `ecp_double_jac()` are the core subroutines
for elliptic curve arithmetic, and as such crucial for the performance
of ECP primitives like ECDHE and ECDSA.

This commit provides a very slight simplification and performance and
memory usage improvement to `ecp_add_mixed()` by removing the use of
three temporary MPIs used for coordinate calculations.

Where those variables were used, the code now writes directly to the
coordinate MPIs of the target elliptic curve point.

This is a valid change even if there is aliasing between input and
output, since at the time any of the coordinate MPIs in question is
written, the corresponding coordinates of both inputs are no longer
read.

(The analogous change in `ecp_double_jac()` can not be made since
this property does not hold for `ecp_double_jac()`.)

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 0212069c83..ba6e8f33be 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1464,7 +1464,10 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    mbedtls_mpi T1, T2, T3, T4, X, Y, Z;
+    mbedtls_mpi T1, T2, T3, T4;
+    mbedtls_mpi * const X = &R->X;
+    mbedtls_mpi * const Y = &R->Y;
+    mbedtls_mpi * const Z = &R->Z;
 
     /*
      * Trivial cases: P == 0 or Q == 0 (case 1)
@@ -1482,7 +1485,6 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
 
     mbedtls_mpi_init( &T1 ); mbedtls_mpi_init( &T2 ); mbedtls_mpi_init( &T3 ); mbedtls_mpi_init( &T4 );
-    mbedtls_mpi_init( &X ); mbedtls_mpi_init( &Y ); mbedtls_mpi_init( &Z );
 
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T1,  &P->Z,  &P->Z ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T2,  &T1,    &P->Z ) );
@@ -1506,28 +1508,23 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
         }
     }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &Z,   &P->Z,  &T1   ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, Z,    &P->Z,  &T1   ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T3,  &T1,    &T1   ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T4,  &T3,    &T1   ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T3,  &T3,    &P->X ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &T1, &T3 ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l_mod( grp, &T1,  1     ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &X,   &T2,    &T2   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &X,   &X,     &T1   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &X,   &X,     &T4   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &T3,  &T3,    &X    ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, X,    &T2,    &T2   ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, X,    X,      &T1   ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, X,    X,      &T4   ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &T3,  &T3,    X     ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T3,  &T3,    &T2   ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T4,  &T4,    &P->Y ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &Y,   &T3,    &T4   ) );
-
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->X, &X ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Y, &Y ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Z, &Z ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, Y,    &T3,    &T4   ) );
 
 cleanup:
 
     mbedtls_mpi_free( &T1 ); mbedtls_mpi_free( &T2 ); mbedtls_mpi_free( &T3 ); mbedtls_mpi_free( &T4 );
-    mbedtls_mpi_free( &X ); mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &Z );
 
     return( ret );
 #endif /* !defined(MBEDTLS_ECP_NO_FALLBACK) || !defined(MBEDTLS_ECP_ADD_MIXED_ALT) */

From 02b35bd00a0b2c100975b0b0575a74ec5250324b Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Sat, 1 Jan 2022 06:54:25 +0000
Subject: [PATCH 02/21] Introduce wrapper for modular multiplication with
 single-width const

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index ba6e8f33be..0fd570ef8e 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1138,6 +1138,19 @@ cleanup:
     return( ret );
 }
 
+static inline int mbedtls_mpi_mul_int_mod( const mbedtls_ecp_group *grp,
+                                           mbedtls_mpi *X,
+                                           const mbedtls_mpi *A,
+                                           mbedtls_mpi_uint c )
+{
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( X, A, c ) );
+    MOD_ADD( *X );
+cleanup:
+    return( ret );
+}
+
 #if defined(MBEDTLS_ECP_SHORT_WEIERSTRASS_ENABLED) && \
     !( defined(MBEDTLS_ECP_NO_FALLBACK) && \
        defined(MBEDTLS_ECP_DOUBLE_JAC_ALT) && \
@@ -1372,17 +1385,17 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     if( grp->A.p == NULL )
     {
         /* M = 3(X + Z^2)(X - Z^2) */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S,  &P->Z,  &P->Z   ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, &T,  &P->X,  &S      ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &U,  &P->X,  &S      ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S,  &T,     &U      ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &M,  &S,     3       ) ); MOD_ADD( M );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod(     grp, &S,  &P->Z,  &P->Z   ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod(     grp, &T,  &P->X,  &S      ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod(     grp, &U,  &P->X,  &S      ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod(     grp, &S,  &T,     &U      ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int_mod( grp, &M,  &S,     3       ) );
     }
     else
     {
         /* M = 3.X^2 */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S,  &P->X,  &P->X   ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &M,  &S,     3       ) ); MOD_ADD( M );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod    ( grp, &S,  &P->X,  &P->X   ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int_mod( grp, &M,  &S,     3       ) );
 
         /* Optimize away for "koblitz" curves with A = 0 */
         if( mbedtls_mpi_cmp_int( &grp->A, 0 ) != 0 )

From 76f897d6994ca95c64edf527127ecb9b82a0d071 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Sun, 2 Jan 2022 12:47:34 +0000
Subject: [PATCH 03/21] Reduce number of temporary MPIs in ECP normalization

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 0fd570ef8e..aec67870c5 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1194,30 +1194,25 @@ static int ecp_normalize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
     return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    mbedtls_mpi Zi, ZZi;
-    mbedtls_mpi_init( &Zi ); mbedtls_mpi_init( &ZZi );
+    mbedtls_mpi T;
+    mbedtls_mpi_init( &T );
 
-    /*
-     * X = X / Z^2  mod p
-     */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &Zi,      &pt->Z,     &grp->P ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &ZZi,     &Zi,        &Zi     ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->X,   &pt->X,     &ZZi    ) );
+    /* T   <-          1 / Z   */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &T,     &pt->Z, &grp->P ) );
+    /* Y'  <- Y*T    = Y / Z   */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->Y, &pt->Y, &T ) );
+    /* T   <- T^2    = 1 / Z^2 */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T,     &T,     &T ) );
+    /* X   <- X  * T = X / Z^2 */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->X, &pt->X, &T ) );
+    /* Y'' <- Y' * T = Y / Z^3 */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->Y, &pt->Y, &T ) );
 
-    /*
-     * Y = Y / Z^3  mod p
-     */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->Y,   &pt->Y,     &ZZi    ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->Y,   &pt->Y,     &Zi     ) );
-
-    /*
-     * Z = 1
-     */
     MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &pt->Z, 1 ) );
 
 cleanup:
 
-    mbedtls_mpi_free( &Zi ); mbedtls_mpi_free( &ZZi );
+    mbedtls_mpi_free( &T );
 
     return( ret );
 #endif /* !defined(MBEDTLS_ECP_NO_FALLBACK) || !defined(MBEDTLS_ECP_NORMALIZE_JAC_ALT) */

From ce29ae84dd425877178b81ee9fdf1491ef623943 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 04:55:11 +0000
Subject: [PATCH 04/21] Introduce macro wrappers for ECC modular arithmetic

This improves readibility and prepares for further changes
like the introduction of a single double-width temporary for
ECP arithmetic.

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 255 +++++++++++++++++++++++++++++---------------------
 1 file changed, 146 insertions(+), 109 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index aec67870c5..dcfe949839 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1093,9 +1093,11 @@ cleanup:
  * Reduce a mbedtls_mpi mod p in-place, to use after mbedtls_mpi_sub_mpi
  * N->s < 0 is a very fast test, which fails only if N is 0
  */
-#define MOD_SUB( N )                                                    \
-    while( (N).s < 0 && mbedtls_mpi_cmp_int( &(N), 0 ) != 0 )           \
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &(N), &(N), &grp->P ) )
+#define MOD_SUB( N )                                                          \
+    do {                                                                      \
+        while( (N)->s < 0 && mbedtls_mpi_cmp_int( (N), 0 ) != 0 )             \
+            MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( (N), (N), &grp->P ) );      \
+    } while( 0 )
 
 #if ( defined(MBEDTLS_ECP_SHORT_WEIERSTRASS_ENABLED) && \
       !( defined(MBEDTLS_ECP_NO_FALLBACK) && \
@@ -1111,7 +1113,7 @@ static inline int mbedtls_mpi_sub_mod( const mbedtls_ecp_group *grp,
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( X, A, B ) );
-    MOD_SUB( *X );
+    MOD_SUB( X );
 cleanup:
     return( ret );
 }
@@ -1122,9 +1124,9 @@ cleanup:
  * We known P, N and the result are positive, so sub_abs is correct, and
  * a bit faster.
  */
-#define MOD_ADD( N )                                                    \
-    while( mbedtls_mpi_cmp_mpi( &(N), &grp->P ) >= 0 )                  \
-        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( &(N), &(N), &grp->P ) )
+#define MOD_ADD( N )                                                   \
+    while( mbedtls_mpi_cmp_mpi( (N), &grp->P ) >= 0 )                  \
+        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( (N), (N), &grp->P ) )
 
 static inline int mbedtls_mpi_add_mod( const mbedtls_ecp_group *grp,
                                        mbedtls_mpi *X,
@@ -1133,7 +1135,7 @@ static inline int mbedtls_mpi_add_mod( const mbedtls_ecp_group *grp,
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, A, B ) );
-    MOD_ADD( *X );
+    MOD_ADD( X );
 cleanup:
     return( ret );
 }
@@ -1146,11 +1148,27 @@ static inline int mbedtls_mpi_mul_int_mod( const mbedtls_ecp_group *grp,
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( X, A, c ) );
-    MOD_ADD( *X );
+    MOD_ADD( X );
 cleanup:
     return( ret );
 }
 
+static inline int mbedtls_mpi_sub_int_mod( const mbedtls_ecp_group *grp,
+                                           mbedtls_mpi *X,
+                                           const mbedtls_mpi *A,
+                                           mbedtls_mpi_uint c )
+{
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( X, A, c ) );
+    MOD_SUB( X );
+cleanup:
+    return( ret );
+}
+
+#define MPI_ECP_SUB_INT( X, A, c )             \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int_mod( grp, X, A, c ) )
+
 #if defined(MBEDTLS_ECP_SHORT_WEIERSTRASS_ENABLED) && \
     !( defined(MBEDTLS_ECP_NO_FALLBACK) && \
        defined(MBEDTLS_ECP_DOUBLE_JAC_ALT) && \
@@ -1161,12 +1179,33 @@ static inline int mbedtls_mpi_shift_l_mod( const mbedtls_ecp_group *grp,
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( X, count ) );
-    MOD_ADD( *X );
+    MOD_ADD( X );
 cleanup:
     return( ret );
 }
 #endif /* All functions referencing mbedtls_mpi_shift_l_mod() are alt-implemented without fallback */
 
+#define MPI_ECP_ADD( X, A, B )                                      \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, X, A, B ) )
+
+#define MPI_ECP_SUB( X, A, B )                                      \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, X, A, B ) )
+
+#define MPI_ECP_MUL( X, A, B )                                      \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, X, A, B ) )
+
+#define MPI_ECP_MUL_INT( X, A, c )                                  \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int_mod( grp, X, A, c ) )
+
+#define MPI_ECP_INV( dst, src )                                     \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( (dst), (src), &grp->P ) )
+
+#define MPI_ECP_MOV( X, A )                                         \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) )
+
+#define MPI_ECP_SHIFT_L( X, count )                                 \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l_mod( grp, X, count ) )
+
 #if defined(MBEDTLS_ECP_SHORT_WEIERSTRASS_ENABLED)
 /*
  * For curves in short Weierstrass form, we do all the internal operations in
@@ -1197,16 +1236,11 @@ static int ecp_normalize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
     mbedtls_mpi T;
     mbedtls_mpi_init( &T );
 
-    /* T   <-          1 / Z   */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &T,     &pt->Z, &grp->P ) );
-    /* Y'  <- Y*T    = Y / Z   */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->Y, &pt->Y, &T ) );
-    /* T   <- T^2    = 1 / Z^2 */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T,     &T,     &T ) );
-    /* X   <- X  * T = X / Z^2 */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->X, &pt->X, &T ) );
-    /* Y'' <- Y' * T = Y / Z^3 */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->Y, &pt->Y, &T ) );
+    MPI_ECP_INV( &T,       &pt->Z );          /* T   <-          1 / Z   */
+    MPI_ECP_MUL( &pt->Y,   &pt->Y,     &T );  /* Y'  <- Y*T    = Y / Z   */
+    MPI_ECP_MUL( &T,       &T,         &T );  /* T   <- T^2    = 1 / Z^2 */
+    MPI_ECP_MUL( &pt->X,   &pt->X,     &T );  /* X   <- X  * T = X / Z^2 */
+    MPI_ECP_MUL( &pt->Y,   &pt->Y,     &T );  /* Y'' <- Y' * T = Y / Z^3 */
 
     MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &pt->Z, 1 ) );
 
@@ -1258,16 +1292,16 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
     /*
      * c[i] = Z_0 * ... * Z_i
      */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &c[0], &T[0]->Z ) );
+    MPI_ECP_MOV( &c[0], &T[0]->Z );
     for( i = 1; i < T_size; i++ )
     {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &c[i], &c[i-1], &T[i]->Z ) );
+        MPI_ECP_MUL( &c[i], &c[i-1], &T[i]->Z );
     }
 
     /*
      * u = 1 / (Z_0 * ... * Z_n) mod P
      */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &u, &c[T_size-1], &grp->P ) );
+    MPI_ECP_INV( &u, &c[T_size-1] );
 
     for( i = T_size - 1; ; i-- )
     {
@@ -1275,22 +1309,23 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
          * Zi = 1 / Z_i mod p
          * u = 1 / (Z_0 * ... * Z_i) mod P
          */
-        if( i == 0 ) {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Zi, &u ) );
+        if( i == 0 )
+        {
+            MPI_ECP_MOV( &Zi, &u );
         }
         else
         {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &Zi, &u, &c[i-1]  ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &u,  &u, &T[i]->Z ) );
+            MPI_ECP_MUL( &Zi, &u, &c[i-1]  );
+            MPI_ECP_MUL( &u,  &u, &T[i]->Z );
         }
 
         /*
          * proceed as in normalize()
          */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &ZZi,     &Zi,      &Zi  ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T[i]->X, &T[i]->X, &ZZi ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T[i]->Y, &T[i]->Y, &ZZi ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T[i]->Y, &T[i]->Y, &Zi  ) );
+        MPI_ECP_MUL( &ZZi,     &Zi,      &Zi  );
+        MPI_ECP_MUL( &T[i]->X, &T[i]->X, &ZZi );
+        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &ZZi );
+        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &Zi  );
 
         /*
          * Post-precessing: reclaim some memory by shrinking coordinates
@@ -1380,52 +1415,52 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     if( grp->A.p == NULL )
     {
         /* M = 3(X + Z^2)(X - Z^2) */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod(     grp, &S,  &P->Z,  &P->Z   ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod(     grp, &T,  &P->X,  &S      ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod(     grp, &U,  &P->X,  &S      ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod(     grp, &S,  &T,     &U      ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int_mod( grp, &M,  &S,     3       ) );
+        MPI_ECP_MUL(     &S,  &P->Z,  &P->Z   );
+        MPI_ECP_ADD(     &T,  &P->X,  &S      );
+        MPI_ECP_SUB(     &U,  &P->X,  &S      );
+        MPI_ECP_MUL(     &S,  &T,     &U      );
+        MPI_ECP_MUL_INT( &M,  &S,     3       );
     }
     else
     {
         /* M = 3.X^2 */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod    ( grp, &S,  &P->X,  &P->X   ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int_mod( grp, &M,  &S,     3       ) );
+        MPI_ECP_MUL(     &S,  &P->X,  &P->X   );
+        MPI_ECP_MUL_INT( &M,  &S,     3       );
 
         /* Optimize away for "koblitz" curves with A = 0 */
         if( mbedtls_mpi_cmp_int( &grp->A, 0 ) != 0 )
         {
             /* M += A.Z^4 */
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S,  &P->Z,  &P->Z   ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T,  &S,     &S      ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S,  &T,     &grp->A ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, &M,  &M,     &S      ) );
+            MPI_ECP_MUL( &S,  &P->Z,  &P->Z   );
+            MPI_ECP_MUL( &T,  &S,     &S      );
+            MPI_ECP_MUL( &S,  &T,     &grp->A );
+            MPI_ECP_ADD( &M,  &M,     &S      );
         }
     }
 
     /* S = 4.X.Y^2 */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T,  &P->Y,  &P->Y   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l_mod( grp, &T,  1               ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S,  &P->X,  &T      ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l_mod( grp, &S,  1               ) );
+    MPI_ECP_MUL(     &T,  &P->Y,  &P->Y   );
+    MPI_ECP_SHIFT_L( &T,  1               );
+    MPI_ECP_MUL(     &S,  &P->X,  &T      );
+    MPI_ECP_SHIFT_L( &S,  1               );
 
     /* U = 8.Y^4 */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &U,  &T,     &T      ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l_mod( grp, &U,  1               ) );
+    MPI_ECP_MUL(     &U,  &T,     &T      );
+    MPI_ECP_SHIFT_L( &U,  1               );
 
     /* T = M^2 - 2.S */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T,  &M,     &M      ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &T,  &T,     &S      ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &T,  &T,     &S      ) );
+    MPI_ECP_MUL( &T,  &M,     &M      );
+    MPI_ECP_SUB( &T,  &T,     &S      );
+    MPI_ECP_SUB( &T,  &T,     &S      );
 
     /* S = M(S - T) - U */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &S,  &S,     &T      ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S,  &S,     &M      ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &S,  &S,     &U      ) );
+    MPI_ECP_SUB( &S,  &S,     &T      );
+    MPI_ECP_MUL( &S,  &S,     &M      );
+    MPI_ECP_SUB( &S,  &S,     &U      );
 
     /* U = 2.Y.Z */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &U,  &P->Y,  &P->Z   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l_mod( grp, &U,  1               ) );
+    MPI_ECP_MUL(     &U,  &P->Y,  &P->Z   );
+    MPI_ECP_SHIFT_L( &U,  1               );
 
     MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->X, &T ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Y, &S ) );
@@ -1494,12 +1529,12 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
 
     mbedtls_mpi_init( &T1 ); mbedtls_mpi_init( &T2 ); mbedtls_mpi_init( &T3 ); mbedtls_mpi_init( &T4 );
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T1,  &P->Z,  &P->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T2,  &T1,    &P->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T1,  &T1,    &Q->X ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T2,  &T2,    &Q->Y ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &T1,  &T1,    &P->X ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &T2,  &T2,    &P->Y ) );
+    MPI_ECP_MUL( &T1,  &P->Z,  &P->Z );
+    MPI_ECP_MUL( &T2,  &T1,    &P->Z );
+    MPI_ECP_MUL( &T1,  &T1,    &Q->X );
+    MPI_ECP_MUL( &T2,  &T2,    &Q->Y );
+    MPI_ECP_SUB( &T1,  &T1,    &P->X );
+    MPI_ECP_SUB( &T2,  &T2,    &P->Y );
 
     /* Special cases (2) and (3) */
     if( mbedtls_mpi_cmp_int( &T1, 0 ) == 0 )
@@ -1516,19 +1551,21 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
         }
     }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, Z,    &P->Z,  &T1   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T3,  &T1,    &T1   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T4,  &T3,    &T1   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T3,  &T3,    &P->X ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &T1, &T3 ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l_mod( grp, &T1,  1     ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, X,    &T2,    &T2   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, X,    X,      &T1   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, X,    X,      &T4   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &T3,  &T3,    X     ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T3,  &T3,    &T2   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &T4,  &T4,    &P->Y ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, Y,    &T3,    &T4   ) );
+    MPI_ECP_MUL( Z,    &P->Z,  &T1   );
+    MPI_ECP_MUL( &T3,  &T1,    &T1   );
+    MPI_ECP_MUL( &T4,  &T3,    &T1   );
+    MPI_ECP_MUL( &T3,  &T3,    &P->X );
+
+    MPI_ECP_MOV( &T1, &T3 );
+    MPI_ECP_SHIFT_L( &T1, 1 );
+
+    MPI_ECP_MUL( X,    &T2,    &T2   );
+    MPI_ECP_SUB( X,    X,      &T1   );
+    MPI_ECP_SUB( X,    X,      &T4   );
+    MPI_ECP_SUB( &T3,  &T3,    X     );
+    MPI_ECP_MUL( &T3,  &T3,    &T2   );
+    MPI_ECP_MUL( &T4,  &T4,    &P->Y );
+    MPI_ECP_SUB( Y,    &T3,    &T4   );
 
 cleanup:
 
@@ -1565,15 +1602,15 @@ static int ecp_randomize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
     MBEDTLS_MPI_CHK( mbedtls_mpi_random( &l, 2, &grp->P, f_rng, p_rng ) );
 
     /* Z = l * Z */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->Z,   &pt->Z,     &l  ) );
+    MPI_ECP_MUL( &pt->Z,   &pt->Z,     &l  );
 
     /* X = l^2 * X */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &ll,      &l,         &l  ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->X,   &pt->X,     &ll ) );
+    MPI_ECP_MUL( &ll,      &l,         &l  );
+    MPI_ECP_MUL( &pt->X,   &pt->X,     &ll );
 
     /* Y = l^3 * Y */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &ll,      &ll,        &l  ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &pt->Y,   &pt->Y,     &ll ) );
+    MPI_ECP_MUL( &ll,      &ll,        &l  );
+    MPI_ECP_MUL( &pt->Y,   &pt->Y,     &ll );
 
 cleanup:
     mbedtls_mpi_free( &l ); mbedtls_mpi_free( &ll );
@@ -2263,8 +2300,8 @@ static int ecp_normalize_mxz( const mbedtls_ecp_group *grp, mbedtls_ecp_point *P
     return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &P->Z, &P->Z, &grp->P ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &P->X, &P->X, &P->Z ) );
+    MPI_ECP_INV( &P->Z, &P->Z );
+    MPI_ECP_MUL( &P->X, &P->X, &P->Z );
     MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &P->Z, 1 ) );
 
 cleanup:
@@ -2298,8 +2335,8 @@ static int ecp_randomize_mxz( const mbedtls_ecp_group *grp, mbedtls_ecp_point *P
     /* Generate l such that 1 < l < p */
     MBEDTLS_MPI_CHK( mbedtls_mpi_random( &l, 2, &grp->P, f_rng, p_rng ) );
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &P->X, &P->X, &l ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &P->Z, &P->Z, &l ) );
+    MPI_ECP_MUL( &P->X, &P->X, &l );
+    MPI_ECP_MUL( &P->Z, &P->Z, &l );
 
 cleanup:
     mbedtls_mpi_free( &l );
@@ -2345,24 +2382,24 @@ static int ecp_double_add_mxz( const mbedtls_ecp_group *grp,
     mbedtls_mpi_init( &BB ); mbedtls_mpi_init( &E ); mbedtls_mpi_init( &C );
     mbedtls_mpi_init( &D ); mbedtls_mpi_init( &DA ); mbedtls_mpi_init( &CB );
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, &A,    &P->X,   &P->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &AA,   &A,      &A    ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &B,    &P->X,   &P->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &BB,   &B,      &B    ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &E,    &AA,     &BB   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, &C,    &Q->X,   &Q->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &D,    &Q->X,   &Q->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &DA,   &D,      &A    ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &CB,   &C,      &B    ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, &S->X, &DA,     &CB   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S->X, &S->X,   &S->X ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, &S->Z, &DA,     &CB   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S->Z, &S->Z,   &S->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &S->Z, d,       &S->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &R->X, &AA,     &BB   ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &R->Z, &grp->A, &E    ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, &R->Z, &BB,     &R->Z ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &R->Z, &E,      &R->Z ) );
+    MPI_ECP_ADD( &A,    &P->X,   &P->Z );
+    MPI_ECP_MUL( &AA,   &A,      &A    );
+    MPI_ECP_SUB( &B,    &P->X,   &P->Z );
+    MPI_ECP_MUL( &BB,   &B,      &B    );
+    MPI_ECP_SUB( &E,    &AA,     &BB   );
+    MPI_ECP_ADD( &C,    &Q->X,   &Q->Z );
+    MPI_ECP_SUB( &D,    &Q->X,   &Q->Z );
+    MPI_ECP_MUL( &DA,   &D,      &A    );
+    MPI_ECP_MUL( &CB,   &C,      &B    );
+    MPI_ECP_ADD( &S->X, &DA,     &CB   );
+    MPI_ECP_MUL( &S->X, &S->X,   &S->X );
+    MPI_ECP_SUB( &S->Z, &DA,     &CB   );
+    MPI_ECP_MUL( &S->Z, &S->Z,   &S->Z );
+    MPI_ECP_MUL( &S->Z, d,       &S->Z );
+    MPI_ECP_MUL( &R->X, &AA,     &BB   );
+    MPI_ECP_MUL( &R->Z, &grp->A, &E    );
+    MPI_ECP_ADD( &R->Z, &BB,     &R->Z );
+    MPI_ECP_MUL( &R->Z, &E,      &R->Z );
 
 cleanup:
     mbedtls_mpi_free( &A ); mbedtls_mpi_free( &AA ); mbedtls_mpi_free( &B );
@@ -2402,7 +2439,7 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     mbedtls_mpi_free( &R->Y );
 
     /* RP.X might be sligtly larger than P, so reduce it */
-    MOD_ADD( RP.X );
+    MOD_ADD( &RP.X );
 
     /* Randomize coordinates of the starting point */
     MBEDTLS_MPI_CHK( ecp_randomize_mxz( grp, &RP, f_rng, p_rng ) );
@@ -2571,21 +2608,21 @@ static int ecp_check_pubkey_sw( const mbedtls_ecp_group *grp, const mbedtls_ecp_
      * YY = Y^2
      * RHS = X (X^2 + A) + B = X^3 + A X + B
      */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &YY,  &pt->Y,   &pt->Y  ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &RHS, &pt->X,   &pt->X  ) );
+    MPI_ECP_MUL( &YY,  &pt->Y, &pt->Y  );
+    MPI_ECP_MUL( &RHS, &pt->X, &pt->X  );
 
     /* Special case for A = -3 */
     if( grp->A.p == NULL )
     {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( &RHS, &RHS, 3       ) );  MOD_SUB( RHS );
+        MPI_ECP_SUB_INT( &RHS, &RHS, 3 );
     }
     else
     {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, &RHS, &RHS, &grp->A ) );
+        MPI_ECP_ADD( &RHS, &RHS, &grp->A );
     }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, &RHS, &RHS,     &pt->X  ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, &RHS, &RHS,     &grp->B ) );
+    MPI_ECP_MUL( &RHS, &RHS, &pt->X  );
+    MPI_ECP_ADD( &RHS, &RHS, &grp->B );
 
     if( mbedtls_mpi_cmp_mpi( &YY, &RHS ) != 0 )
         ret = MBEDTLS_ERR_ECP_INVALID_KEY;

From 838b715fcc3de6fedc6fd6f501d911bfc9b1aa00 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 05:01:53 +0000
Subject: [PATCH 05/21] Add comment on input/output aliasing in ecp_add_mixed()

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/library/ecp.c b/library/ecp.c
index dcfe949839..31f2e7fa50 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1508,6 +1508,10 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     mbedtls_mpi T1, T2, T3, T4;
+
+    /* NOTE: Aliasing between input and output is allowed, so one has to make
+     *       sure that at the point X,Y,Z are written, {P,Q}->{X,Y,Z} are no
+     *       longer read from. */
     mbedtls_mpi * const X = &R->X;
     mbedtls_mpi * const Y = &R->Y;
     mbedtls_mpi * const Z = &R->Z;
@@ -1551,6 +1555,7 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
         }
     }
 
+    /* {P,Q}->Z no longer used, so OK to write to Z even if there's aliasing. */
     MPI_ECP_MUL( Z,    &P->Z,  &T1   );
     MPI_ECP_MUL( &T3,  &T1,    &T1   );
     MPI_ECP_MUL( &T4,  &T3,    &T1   );
@@ -1559,12 +1564,14 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     MPI_ECP_MOV( &T1, &T3 );
     MPI_ECP_SHIFT_L( &T1, 1 );
 
+    /* {P,Q}->X no longer used, so OK to write to X even if there's aliasing. */
     MPI_ECP_MUL( X,    &T2,    &T2   );
     MPI_ECP_SUB( X,    X,      &T1   );
     MPI_ECP_SUB( X,    X,      &T4   );
     MPI_ECP_SUB( &T3,  &T3,    X     );
     MPI_ECP_MUL( &T3,  &T3,    &T2   );
     MPI_ECP_MUL( &T4,  &T4,    &P->Y );
+    /* {P,Q}->Y no longer used, so OK to write to Y even if there's aliasing. */
     MPI_ECP_SUB( Y,    &T3,    &T4   );
 
 cleanup:

From 02a999b91a02144ba34ca9352d297408d37c67e8 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 05:18:57 +0000
Subject: [PATCH 06/21] Remove local MPI from ecp_normalize_jac_many()

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 31f2e7fa50..c441e10cb3 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1279,7 +1279,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t i;
-    mbedtls_mpi *c, u, Zi, ZZi;
+    mbedtls_mpi *c, u, Zi;
 
     if( ( c = mbedtls_calloc( T_size, sizeof( mbedtls_mpi ) ) ) == NULL )
         return( MBEDTLS_ERR_ECP_ALLOC_FAILED );
@@ -1287,7 +1287,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
     for( i = 0; i < T_size; i++ )
         mbedtls_mpi_init( &c[i] );
 
-    mbedtls_mpi_init( &u ); mbedtls_mpi_init( &Zi ); mbedtls_mpi_init( &ZZi );
+    mbedtls_mpi_init( &u ); mbedtls_mpi_init( &Zi );
 
     /*
      * c[i] = Z_0 * ... * Z_i
@@ -1322,10 +1322,10 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
         /*
          * proceed as in normalize()
          */
-        MPI_ECP_MUL( &ZZi,     &Zi,      &Zi  );
-        MPI_ECP_MUL( &T[i]->X, &T[i]->X, &ZZi );
-        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &ZZi );
-        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &Zi  );
+        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &Zi );
+        MPI_ECP_MUL( &Zi,      &Zi,      &Zi );
+        MPI_ECP_MUL( &T[i]->X, &T[i]->X, &Zi );
+        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &Zi );
 
         /*
          * Post-precessing: reclaim some memory by shrinking coordinates
@@ -1343,7 +1343,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
 
 cleanup:
 
-    mbedtls_mpi_free( &u ); mbedtls_mpi_free( &Zi ); mbedtls_mpi_free( &ZZi );
+    mbedtls_mpi_free( &u ); mbedtls_mpi_free( &Zi );
     for( i = 0; i < T_size; i++ )
         mbedtls_mpi_free( &c[i] );
     mbedtls_free( c );

From b8442cd9c6323a6a8255eecc94617becee5056ae Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 06:32:11 +0000
Subject: [PATCH 07/21] Remove another local MPI from ecp_normalize_jac_many()

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index c441e10cb3..783426571e 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1279,7 +1279,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t i;
-    mbedtls_mpi *c, u, Zi;
+    mbedtls_mpi *c, t;
 
     if( ( c = mbedtls_calloc( T_size, sizeof( mbedtls_mpi ) ) ) == NULL )
         return( MBEDTLS_ERR_ECP_ALLOC_FAILED );
@@ -1287,7 +1287,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
     for( i = 0; i < T_size; i++ )
         mbedtls_mpi_init( &c[i] );
 
-    mbedtls_mpi_init( &u ); mbedtls_mpi_init( &Zi );
+    mbedtls_mpi_init( &t );
 
     /*
      * c[i] = Z_0 * ... * Z_i
@@ -1299,33 +1299,40 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
     }
 
     /*
-     * u = 1 / (Z_0 * ... * Z_n) mod P
+     * c[n] = 1 / (Z_0 * ... * Z_n) mod P
      */
-    MPI_ECP_INV( &u, &c[T_size-1] );
+    MPI_ECP_INV( &c[T_size-1], &c[T_size-1] );
 
     for( i = T_size - 1; ; i-- )
     {
-        /*
-         * Zi = 1 / Z_i mod p
-         * u = 1 / (Z_0 * ... * Z_i) mod P
+        /* At the start of iteration i (note that i decrements), we have
+         * - c[j] = Z_0 * .... * Z_j        for j  < i,
+         * - c[j] = 1 / (Z_0 * .... * Z_j)  for j == i,
+         *
+         * This is maintained via
+         * - c[i-1] <- c[i] * Z_i
+         *
+         * We also derive 1/Z_i = c[i] * c[i-1] for i>0 and use that
+         * to do the actual normalization. For i==0, we already have
+         * c[0] = 1 / Z_0.
          */
-        if( i == 0 )
+
+        if( i > 0 )
         {
-            MPI_ECP_MOV( &Zi, &u );
+            /* Compute 1/Z_i and establish invariant for the next iteration. */
+            MPI_ECP_MUL( &t,      &c[i], &c[i-1]  );
+            MPI_ECP_MUL( &c[i-1], &c[i], &T[i]->Z );
         }
         else
         {
-            MPI_ECP_MUL( &Zi, &u, &c[i-1]  );
-            MPI_ECP_MUL( &u,  &u, &T[i]->Z );
+            MPI_ECP_MOV( &t, &c[0] );
         }
 
-        /*
-         * proceed as in normalize()
-         */
-        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &Zi );
-        MPI_ECP_MUL( &Zi,      &Zi,      &Zi );
-        MPI_ECP_MUL( &T[i]->X, &T[i]->X, &Zi );
-        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &Zi );
+        /* Now t holds 1 / Z_i; normalize as in ecp_normalize_jac() */
+        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &t );
+        MPI_ECP_MUL( &t,       &t,       &t );
+        MPI_ECP_MUL( &T[i]->X, &T[i]->X, &t );
+        MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &t );
 
         /*
          * Post-precessing: reclaim some memory by shrinking coordinates
@@ -1343,7 +1350,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
 
 cleanup:
 
-    mbedtls_mpi_free( &u ); mbedtls_mpi_free( &Zi );
+    mbedtls_mpi_free( &t );
     for( i = 0; i < T_size; i++ )
         mbedtls_mpi_free( &c[i] );
     mbedtls_free( c );

From 885ed403c954eebe5c16abec2c6bc99091073193 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 06:43:50 +0000
Subject: [PATCH 08/21] Introduce wrapper for modular squaring

This paves the way for dedicated squaring implementations.

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 55 +++++++++++++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 783426571e..d85b00782f 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1194,6 +1194,9 @@ cleanup:
 #define MPI_ECP_MUL( X, A, B )                                      \
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, X, A, B ) )
 
+#define MPI_ECP_SQR( X, A )                                         \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, X, A, A ) )
+
 #define MPI_ECP_MUL_INT( X, A, c )                                  \
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int_mod( grp, X, A, c ) )
 
@@ -1238,7 +1241,7 @@ static int ecp_normalize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
 
     MPI_ECP_INV( &T,       &pt->Z );          /* T   <-          1 / Z   */
     MPI_ECP_MUL( &pt->Y,   &pt->Y,     &T );  /* Y'  <- Y*T    = Y / Z   */
-    MPI_ECP_MUL( &T,       &T,         &T );  /* T   <- T^2    = 1 / Z^2 */
+    MPI_ECP_SQR( &T,       &T             );  /* T   <- T^2    = 1 / Z^2 */
     MPI_ECP_MUL( &pt->X,   &pt->X,     &T );  /* X   <- X  * T = X / Z^2 */
     MPI_ECP_MUL( &pt->Y,   &pt->Y,     &T );  /* Y'' <- Y' * T = Y / Z^3 */
 
@@ -1330,7 +1333,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
 
         /* Now t holds 1 / Z_i; normalize as in ecp_normalize_jac() */
         MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &t );
-        MPI_ECP_MUL( &t,       &t,       &t );
+        MPI_ECP_SQR( &t,       &t           );
         MPI_ECP_MUL( &T[i]->X, &T[i]->X, &t );
         MPI_ECP_MUL( &T[i]->Y, &T[i]->Y, &t );
 
@@ -1422,7 +1425,7 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     if( grp->A.p == NULL )
     {
         /* M = 3(X + Z^2)(X - Z^2) */
-        MPI_ECP_MUL(     &S,  &P->Z,  &P->Z   );
+        MPI_ECP_SQR(     &S,  &P->Z           );
         MPI_ECP_ADD(     &T,  &P->X,  &S      );
         MPI_ECP_SUB(     &U,  &P->X,  &S      );
         MPI_ECP_MUL(     &S,  &T,     &U      );
@@ -1431,34 +1434,34 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     else
     {
         /* M = 3.X^2 */
-        MPI_ECP_MUL(     &S,  &P->X,  &P->X   );
-        MPI_ECP_MUL_INT( &M,  &S,     3       );
+        MPI_ECP_SQR(     &S,  &P->X  );
+        MPI_ECP_MUL_INT( &M,  &S,  3 );
 
         /* Optimize away for "koblitz" curves with A = 0 */
         if( mbedtls_mpi_cmp_int( &grp->A, 0 ) != 0 )
         {
             /* M += A.Z^4 */
-            MPI_ECP_MUL( &S,  &P->Z,  &P->Z   );
-            MPI_ECP_MUL( &T,  &S,     &S      );
+            MPI_ECP_SQR( &S,  &P->Z           );
+            MPI_ECP_SQR( &T,  &S              );
             MPI_ECP_MUL( &S,  &T,     &grp->A );
             MPI_ECP_ADD( &M,  &M,     &S      );
         }
     }
 
     /* S = 4.X.Y^2 */
-    MPI_ECP_MUL(     &T,  &P->Y,  &P->Y   );
-    MPI_ECP_SHIFT_L( &T,  1               );
-    MPI_ECP_MUL(     &S,  &P->X,  &T      );
-    MPI_ECP_SHIFT_L( &S,  1               );
+    MPI_ECP_SQR(     &T,  &P->Y     );
+    MPI_ECP_SHIFT_L( &T,  1         );
+    MPI_ECP_MUL(     &S,  &P->X, &T );
+    MPI_ECP_SHIFT_L( &S,  1         );
 
     /* U = 8.Y^4 */
-    MPI_ECP_MUL(     &U,  &T,     &T      );
-    MPI_ECP_SHIFT_L( &U,  1               );
+    MPI_ECP_SQR(     &U,  &T );
+    MPI_ECP_SHIFT_L( &U,  1  );
 
     /* T = M^2 - 2.S */
-    MPI_ECP_MUL( &T,  &M,     &M      );
-    MPI_ECP_SUB( &T,  &T,     &S      );
-    MPI_ECP_SUB( &T,  &T,     &S      );
+    MPI_ECP_SQR( &T,  &M     );
+    MPI_ECP_SUB( &T,  &T, &S );
+    MPI_ECP_SUB( &T,  &T, &S );
 
     /* S = M(S - T) - U */
     MPI_ECP_SUB( &S,  &S,     &T      );
@@ -1540,7 +1543,7 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
 
     mbedtls_mpi_init( &T1 ); mbedtls_mpi_init( &T2 ); mbedtls_mpi_init( &T3 ); mbedtls_mpi_init( &T4 );
 
-    MPI_ECP_MUL( &T1,  &P->Z,  &P->Z );
+    MPI_ECP_SQR( &T1,  &P->Z         );
     MPI_ECP_MUL( &T2,  &T1,    &P->Z );
     MPI_ECP_MUL( &T1,  &T1,    &Q->X );
     MPI_ECP_MUL( &T2,  &T2,    &Q->Y );
@@ -1564,7 +1567,7 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
 
     /* {P,Q}->Z no longer used, so OK to write to Z even if there's aliasing. */
     MPI_ECP_MUL( Z,    &P->Z,  &T1   );
-    MPI_ECP_MUL( &T3,  &T1,    &T1   );
+    MPI_ECP_SQR( &T3,  &T1           );
     MPI_ECP_MUL( &T4,  &T3,    &T1   );
     MPI_ECP_MUL( &T3,  &T3,    &P->X );
 
@@ -1572,7 +1575,7 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     MPI_ECP_SHIFT_L( &T1, 1 );
 
     /* {P,Q}->X no longer used, so OK to write to X even if there's aliasing. */
-    MPI_ECP_MUL( X,    &T2,    &T2   );
+    MPI_ECP_SQR( X,    &T2           );
     MPI_ECP_SUB( X,    X,      &T1   );
     MPI_ECP_SUB( X,    X,      &T4   );
     MPI_ECP_SUB( &T3,  &T3,    X     );
@@ -1619,7 +1622,7 @@ static int ecp_randomize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
     MPI_ECP_MUL( &pt->Z,   &pt->Z,     &l  );
 
     /* X = l^2 * X */
-    MPI_ECP_MUL( &ll,      &l,         &l  );
+    MPI_ECP_SQR( &ll,      &l              );
     MPI_ECP_MUL( &pt->X,   &pt->X,     &ll );
 
     /* Y = l^3 * Y */
@@ -2397,18 +2400,18 @@ static int ecp_double_add_mxz( const mbedtls_ecp_group *grp,
     mbedtls_mpi_init( &D ); mbedtls_mpi_init( &DA ); mbedtls_mpi_init( &CB );
 
     MPI_ECP_ADD( &A,    &P->X,   &P->Z );
-    MPI_ECP_MUL( &AA,   &A,      &A    );
+    MPI_ECP_SQR( &AA,   &A             );
     MPI_ECP_SUB( &B,    &P->X,   &P->Z );
-    MPI_ECP_MUL( &BB,   &B,      &B    );
+    MPI_ECP_SQR( &BB,   &B             );
     MPI_ECP_SUB( &E,    &AA,     &BB   );
     MPI_ECP_ADD( &C,    &Q->X,   &Q->Z );
     MPI_ECP_SUB( &D,    &Q->X,   &Q->Z );
     MPI_ECP_MUL( &DA,   &D,      &A    );
     MPI_ECP_MUL( &CB,   &C,      &B    );
     MPI_ECP_ADD( &S->X, &DA,     &CB   );
-    MPI_ECP_MUL( &S->X, &S->X,   &S->X );
+    MPI_ECP_SQR( &S->X, &S->X          );
     MPI_ECP_SUB( &S->Z, &DA,     &CB   );
-    MPI_ECP_MUL( &S->Z, &S->Z,   &S->Z );
+    MPI_ECP_SQR( &S->Z, &S->Z          );
     MPI_ECP_MUL( &S->Z, d,       &S->Z );
     MPI_ECP_MUL( &R->X, &AA,     &BB   );
     MPI_ECP_MUL( &R->Z, &grp->A, &E    );
@@ -2622,8 +2625,8 @@ static int ecp_check_pubkey_sw( const mbedtls_ecp_group *grp, const mbedtls_ecp_
      * YY = Y^2
      * RHS = X (X^2 + A) + B = X^3 + A X + B
      */
-    MPI_ECP_MUL( &YY,  &pt->Y, &pt->Y  );
-    MPI_ECP_MUL( &RHS, &pt->X, &pt->X  );
+    MPI_ECP_SQR( &YY,  &pt->Y );
+    MPI_ECP_SQR( &RHS, &pt->X );
 
     /* Special case for A = -3 */
     if( grp->A.p == NULL )

From 0d629791e9143056d9b2c2eca793a02256054211 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 06:45:49 +0000
Subject: [PATCH 09/21] Remove local MPI from ecp_randomize_jac()

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index d85b00782f..a0b5914eee 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1611,26 +1611,28 @@ static int ecp_randomize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
     return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    mbedtls_mpi l, ll;
+    mbedtls_mpi l;
 
-    mbedtls_mpi_init( &l ); mbedtls_mpi_init( &ll );
+    mbedtls_mpi_init( &l );
 
     /* Generate l such that 1 < l < p */
     MBEDTLS_MPI_CHK( mbedtls_mpi_random( &l, 2, &grp->P, f_rng, p_rng ) );
 
     /* Z = l * Z */
-    MPI_ECP_MUL( &pt->Z,   &pt->Z,     &l  );
+    MPI_ECP_MUL( &pt->Z,   &pt->Z,     &l );
+
+    /* Y = l * Z */
+    MPI_ECP_MUL( &pt->Y,   &pt->Y,     &l );
 
     /* X = l^2 * X */
-    MPI_ECP_SQR( &ll,      &l              );
-    MPI_ECP_MUL( &pt->X,   &pt->X,     &ll );
+    MPI_ECP_SQR( &l,       &l             );
+    MPI_ECP_MUL( &pt->X,   &pt->X,     &l );
 
     /* Y = l^3 * Y */
-    MPI_ECP_MUL( &ll,      &ll,        &l  );
-    MPI_ECP_MUL( &pt->Y,   &pt->Y,     &ll );
+    MPI_ECP_MUL( &pt->Y,   &pt->Y,     &l );
 
 cleanup:
-    mbedtls_mpi_free( &l ); mbedtls_mpi_free( &ll );
+    mbedtls_mpi_free( &l );
 
     if( ret == MBEDTLS_ERR_MPI_NOT_ACCEPTABLE )
         ret = MBEDTLS_ERR_ECP_RANDOM_FAILED;

From 376dc89519fe1eb65038225bd6a9bf330e1e6bef Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 07:14:07 +0000
Subject: [PATCH 10/21] Reorder ops in ecp_double_add_mxz() to indicate
 redundant local MPIs

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index a0b5914eee..cdffa1ca96 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -2402,22 +2402,22 @@ static int ecp_double_add_mxz( const mbedtls_ecp_group *grp,
     mbedtls_mpi_init( &D ); mbedtls_mpi_init( &DA ); mbedtls_mpi_init( &CB );
 
     MPI_ECP_ADD( &A,    &P->X,   &P->Z );
-    MPI_ECP_SQR( &AA,   &A             );
     MPI_ECP_SUB( &B,    &P->X,   &P->Z );
-    MPI_ECP_SQR( &BB,   &B             );
-    MPI_ECP_SUB( &E,    &AA,     &BB   );
     MPI_ECP_ADD( &C,    &Q->X,   &Q->Z );
     MPI_ECP_SUB( &D,    &Q->X,   &Q->Z );
-    MPI_ECP_MUL( &DA,   &D,      &A    );
-    MPI_ECP_MUL( &CB,   &C,      &B    );
+    MPI_ECP_MUL( &DA,   &D,      &A    ); /* D no longer needed */
+    MPI_ECP_MUL( &CB,   &C,      &B    ); /* C no longer needed */
+    MPI_ECP_SQR( &AA,   &A             ); /* A no longer needed */
+    MPI_ECP_SQR( &BB,   &B             ); /* B no longer needed */
+    MPI_ECP_MUL( &R->X, &AA,     &BB   );
+    MPI_ECP_SUB( &E,    &AA,     &BB   ); /* AA no longer needed */
+    MPI_ECP_MUL( &R->Z, &grp->A, &E    );
+    MPI_ECP_ADD( &R->Z, &BB,     &R->Z ); /* BB no longer needed */
     MPI_ECP_ADD( &S->X, &DA,     &CB   );
     MPI_ECP_SQR( &S->X, &S->X          );
     MPI_ECP_SUB( &S->Z, &DA,     &CB   );
     MPI_ECP_SQR( &S->Z, &S->Z          );
     MPI_ECP_MUL( &S->Z, d,       &S->Z );
-    MPI_ECP_MUL( &R->X, &AA,     &BB   );
-    MPI_ECP_MUL( &R->Z, &grp->A, &E    );
-    MPI_ECP_ADD( &R->Z, &BB,     &R->Z );
     MPI_ECP_MUL( &R->Z, &E,      &R->Z );
 
 cleanup:

From 28ccb1cc90101bed8befcdfbdb961b5413975a2e Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 07:15:04 +0000
Subject: [PATCH 11/21] Reduce number of local MPIs from 9 to 4 in
 ecp_double_add_mxz()

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 48 ++++++++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index cdffa1ca96..d63c571cdd 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -2395,35 +2395,39 @@ static int ecp_double_add_mxz( const mbedtls_ecp_group *grp,
     return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    mbedtls_mpi A, AA, B, BB, E, C, D, DA, CB;
 
-    mbedtls_mpi_init( &A ); mbedtls_mpi_init( &AA ); mbedtls_mpi_init( &B );
-    mbedtls_mpi_init( &BB ); mbedtls_mpi_init( &E ); mbedtls_mpi_init( &C );
-    mbedtls_mpi_init( &D ); mbedtls_mpi_init( &DA ); mbedtls_mpi_init( &CB );
+    mbedtls_mpi T0,T1,T2,T3;
 
-    MPI_ECP_ADD( &A,    &P->X,   &P->Z );
-    MPI_ECP_SUB( &B,    &P->X,   &P->Z );
-    MPI_ECP_ADD( &C,    &Q->X,   &Q->Z );
-    MPI_ECP_SUB( &D,    &Q->X,   &Q->Z );
-    MPI_ECP_MUL( &DA,   &D,      &A    ); /* D no longer needed */
-    MPI_ECP_MUL( &CB,   &C,      &B    ); /* C no longer needed */
-    MPI_ECP_SQR( &AA,   &A             ); /* A no longer needed */
-    MPI_ECP_SQR( &BB,   &B             ); /* B no longer needed */
-    MPI_ECP_MUL( &R->X, &AA,     &BB   );
-    MPI_ECP_SUB( &E,    &AA,     &BB   ); /* AA no longer needed */
-    MPI_ECP_MUL( &R->Z, &grp->A, &E    );
-    MPI_ECP_ADD( &R->Z, &BB,     &R->Z ); /* BB no longer needed */
-    MPI_ECP_ADD( &S->X, &DA,     &CB   );
+    mbedtls_mpi_init( &T0 );
+    mbedtls_mpi_init( &T1 );
+    mbedtls_mpi_init( &T2 );
+    mbedtls_mpi_init( &T3 );
+
+    MPI_ECP_ADD( &T0,   &P->X,   &P->Z );
+    MPI_ECP_SUB( &T1,   &P->X,   &P->Z );
+    MPI_ECP_ADD( &T2,   &Q->X,   &Q->Z );
+    MPI_ECP_SUB( &T3,   &Q->X,   &Q->Z );
+    MPI_ECP_MUL( &T3,   &T3,     &T0   );
+    MPI_ECP_MUL( &T2,   &T2,     &T1   );
+    MPI_ECP_SQR( &T0,   &T0            );
+    MPI_ECP_SQR( &T1,   &T1            );
+    MPI_ECP_MUL( &R->X, &T0,     &T1   );
+    MPI_ECP_SUB( &T0,   &T0,     &T1   );
+    MPI_ECP_MUL( &R->Z, &grp->A, &T0   );
+    MPI_ECP_ADD( &R->Z, &T1,     &R->Z );
+    MPI_ECP_ADD( &S->X, &T3,     &T2   );
     MPI_ECP_SQR( &S->X, &S->X          );
-    MPI_ECP_SUB( &S->Z, &DA,     &CB   );
+    MPI_ECP_SUB( &S->Z, &T3,     &T2   );
     MPI_ECP_SQR( &S->Z, &S->Z          );
     MPI_ECP_MUL( &S->Z, d,       &S->Z );
-    MPI_ECP_MUL( &R->Z, &E,      &R->Z );
+    MPI_ECP_MUL( &R->Z, &T0,     &R->Z );
 
 cleanup:
-    mbedtls_mpi_free( &A ); mbedtls_mpi_free( &AA ); mbedtls_mpi_free( &B );
-    mbedtls_mpi_free( &BB ); mbedtls_mpi_free( &E ); mbedtls_mpi_free( &C );
-    mbedtls_mpi_free( &D ); mbedtls_mpi_free( &DA ); mbedtls_mpi_free( &CB );
+
+    mbedtls_mpi_free( &T0 );
+    mbedtls_mpi_free( &T1 );
+    mbedtls_mpi_free( &T2 );
+    mbedtls_mpi_free( &T3 );
 
     return( ret );
 #endif /* !defined(MBEDTLS_ECP_NO_FALLBACK) || !defined(MBEDTLS_ECP_DOUBLE_ADD_MXZ_ALT) */

From a7f8edd709e7df569b22ce5f5a3648669e577699 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 07:29:46 +0000
Subject: [PATCH 12/21] Keep temporaries across iterated invocations of
 ecp_double_jac()

This reduces the number of heap operations.

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 146 +++++++++++++++++++++++++++++---------------------
 1 file changed, 86 insertions(+), 60 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index d63c571cdd..46b54504b4 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1402,7 +1402,8 @@ cleanup:
  *             3M + 6S + 1a     otherwise
  */
 static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
-                           const mbedtls_ecp_point *P )
+                           const mbedtls_ecp_point *P,
+                           mbedtls_mpi tmp[4] )
 {
 #if defined(MBEDTLS_SELF_TEST)
     dbl_count++;
@@ -1417,67 +1418,63 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    mbedtls_mpi M, S, T, U;
-
-    mbedtls_mpi_init( &M ); mbedtls_mpi_init( &S ); mbedtls_mpi_init( &T ); mbedtls_mpi_init( &U );
 
     /* Special case for A = -3 */
     if( grp->A.p == NULL )
     {
         /* M = 3(X + Z^2)(X - Z^2) */
-        MPI_ECP_SQR(     &S,  &P->Z           );
-        MPI_ECP_ADD(     &T,  &P->X,  &S      );
-        MPI_ECP_SUB(     &U,  &P->X,  &S      );
-        MPI_ECP_MUL(     &S,  &T,     &U      );
-        MPI_ECP_MUL_INT( &M,  &S,     3       );
+        MPI_ECP_SQR(     &tmp[1],  &P->Z                );
+        MPI_ECP_ADD(     &tmp[2],  &P->X,  &tmp[1]      );
+        MPI_ECP_SUB(     &tmp[3],  &P->X,  &tmp[1]      );
+        MPI_ECP_MUL(     &tmp[1],  &tmp[2],     &tmp[3] );
+        MPI_ECP_MUL_INT( &tmp[0],  &tmp[1],     3       );
     }
     else
     {
         /* M = 3.X^2 */
-        MPI_ECP_SQR(     &S,  &P->X  );
-        MPI_ECP_MUL_INT( &M,  &S,  3 );
+        MPI_ECP_SQR(     &tmp[1],  &P->X  );
+        MPI_ECP_MUL_INT( &tmp[0],  &tmp[1],  3 );
 
         /* Optimize away for "koblitz" curves with A = 0 */
         if( mbedtls_mpi_cmp_int( &grp->A, 0 ) != 0 )
         {
             /* M += A.Z^4 */
-            MPI_ECP_SQR( &S,  &P->Z           );
-            MPI_ECP_SQR( &T,  &S              );
-            MPI_ECP_MUL( &S,  &T,     &grp->A );
-            MPI_ECP_ADD( &M,  &M,     &S      );
+            MPI_ECP_SQR( &tmp[1],  &P->Z                );
+            MPI_ECP_SQR( &tmp[2],  &tmp[1]              );
+            MPI_ECP_MUL( &tmp[1],  &tmp[2],     &grp->A );
+            MPI_ECP_ADD( &tmp[0],  &tmp[0],     &tmp[1] );
         }
     }
 
     /* S = 4.X.Y^2 */
-    MPI_ECP_SQR(     &T,  &P->Y     );
-    MPI_ECP_SHIFT_L( &T,  1         );
-    MPI_ECP_MUL(     &S,  &P->X, &T );
-    MPI_ECP_SHIFT_L( &S,  1         );
+    MPI_ECP_SQR(     &tmp[2],  &P->Y     );
+    MPI_ECP_SHIFT_L( &tmp[2],  1         );
+    MPI_ECP_MUL(     &tmp[1],  &P->X, &tmp[2] );
+    MPI_ECP_SHIFT_L( &tmp[1],  1         );
 
     /* U = 8.Y^4 */
-    MPI_ECP_SQR(     &U,  &T );
-    MPI_ECP_SHIFT_L( &U,  1  );
+    MPI_ECP_SQR(     &tmp[3],  &tmp[2] );
+    MPI_ECP_SHIFT_L( &tmp[3],  1       );
 
     /* T = M^2 - 2.S */
-    MPI_ECP_SQR( &T,  &M     );
-    MPI_ECP_SUB( &T,  &T, &S );
-    MPI_ECP_SUB( &T,  &T, &S );
+    MPI_ECP_SQR( &tmp[2],  &tmp[0]     );
+    MPI_ECP_SUB( &tmp[2],  &tmp[2], &tmp[1] );
+    MPI_ECP_SUB( &tmp[2],  &tmp[2], &tmp[1] );
 
     /* S = M(S - T) - U */
-    MPI_ECP_SUB( &S,  &S,     &T      );
-    MPI_ECP_MUL( &S,  &S,     &M      );
-    MPI_ECP_SUB( &S,  &S,     &U      );
+    MPI_ECP_SUB( &tmp[1],  &tmp[1],     &tmp[2] );
+    MPI_ECP_MUL( &tmp[1],  &tmp[1],     &tmp[0] );
+    MPI_ECP_SUB( &tmp[1],  &tmp[1],     &tmp[3] );
 
     /* U = 2.Y.Z */
-    MPI_ECP_MUL(     &U,  &P->Y,  &P->Z   );
-    MPI_ECP_SHIFT_L( &U,  1               );
+    MPI_ECP_MUL(     &tmp[3],  &P->Y,  &P->Z   );
+    MPI_ECP_SHIFT_L( &tmp[3],  1               );
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->X, &T ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Y, &S ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Z, &U ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->X, &tmp[2] ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Y, &tmp[1] ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Z, &tmp[3] ) );
 
 cleanup:
-    mbedtls_mpi_free( &M ); mbedtls_mpi_free( &S ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &U );
 
     return( ret );
 #endif /* !defined(MBEDTLS_ECP_NO_FALLBACK) || !defined(MBEDTLS_ECP_DOUBLE_JAC_ALT) */
@@ -1517,7 +1514,7 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    mbedtls_mpi T1, T2, T3, T4;
+    mbedtls_mpi tmp[4];
 
     /* NOTE: Aliasing between input and output is allowed, so one has to make
      *       sure that at the point X,Y,Z are written, {P,Q}->{X,Y,Z} are no
@@ -1541,21 +1538,24 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     if( Q->Z.p != NULL && mbedtls_mpi_cmp_int( &Q->Z, 1 ) != 0 )
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
 
-    mbedtls_mpi_init( &T1 ); mbedtls_mpi_init( &T2 ); mbedtls_mpi_init( &T3 ); mbedtls_mpi_init( &T4 );
+    mbedtls_mpi_init( &tmp[0] );
+    mbedtls_mpi_init( &tmp[1] );
+    mbedtls_mpi_init( &tmp[2] );
+    mbedtls_mpi_init( &tmp[3] );
 
-    MPI_ECP_SQR( &T1,  &P->Z         );
-    MPI_ECP_MUL( &T2,  &T1,    &P->Z );
-    MPI_ECP_MUL( &T1,  &T1,    &Q->X );
-    MPI_ECP_MUL( &T2,  &T2,    &Q->Y );
-    MPI_ECP_SUB( &T1,  &T1,    &P->X );
-    MPI_ECP_SUB( &T2,  &T2,    &P->Y );
+    MPI_ECP_SQR( &tmp[0], &P->Z         );
+    MPI_ECP_MUL( &tmp[1], &tmp[0], &P->Z );
+    MPI_ECP_MUL( &tmp[0], &tmp[0], &Q->X );
+    MPI_ECP_MUL( &tmp[1], &tmp[1], &Q->Y );
+    MPI_ECP_SUB( &tmp[0], &tmp[0], &P->X );
+    MPI_ECP_SUB( &tmp[1], &tmp[1], &P->Y );
 
     /* Special cases (2) and (3) */
-    if( mbedtls_mpi_cmp_int( &T1, 0 ) == 0 )
+    if( mbedtls_mpi_cmp_int( &tmp[0], 0 ) == 0 )
     {
-        if( mbedtls_mpi_cmp_int( &T2, 0 ) == 0 )
+        if( mbedtls_mpi_cmp_int( &tmp[1], 0 ) == 0 )
         {
-            ret = ecp_double_jac( grp, R, P );
+            ret = ecp_double_jac( grp, R, P, tmp );
             goto cleanup;
         }
         else
@@ -1566,27 +1566,30 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     }
 
     /* {P,Q}->Z no longer used, so OK to write to Z even if there's aliasing. */
-    MPI_ECP_MUL( Z,    &P->Z,  &T1   );
-    MPI_ECP_SQR( &T3,  &T1           );
-    MPI_ECP_MUL( &T4,  &T3,    &T1   );
-    MPI_ECP_MUL( &T3,  &T3,    &P->X );
+    MPI_ECP_MUL( Z,        &P->Z,    &tmp[0] );
+    MPI_ECP_SQR( &tmp[2],  &tmp[0]           );
+    MPI_ECP_MUL( &tmp[3],  &tmp[2],  &tmp[0] );
+    MPI_ECP_MUL( &tmp[2],  &tmp[2],  &P->X   );
 
-    MPI_ECP_MOV( &T1, &T3 );
-    MPI_ECP_SHIFT_L( &T1, 1 );
+    MPI_ECP_MOV( &tmp[0], &tmp[2] );
+    MPI_ECP_SHIFT_L( &tmp[0], 1 );
 
     /* {P,Q}->X no longer used, so OK to write to X even if there's aliasing. */
-    MPI_ECP_SQR( X,    &T2           );
-    MPI_ECP_SUB( X,    X,      &T1   );
-    MPI_ECP_SUB( X,    X,      &T4   );
-    MPI_ECP_SUB( &T3,  &T3,    X     );
-    MPI_ECP_MUL( &T3,  &T3,    &T2   );
-    MPI_ECP_MUL( &T4,  &T4,    &P->Y );
+    MPI_ECP_SQR( X,        &tmp[1]           );
+    MPI_ECP_SUB( X,        X,        &tmp[0] );
+    MPI_ECP_SUB( X,        X,        &tmp[3] );
+    MPI_ECP_SUB( &tmp[2],  &tmp[2],  X       );
+    MPI_ECP_MUL( &tmp[2],  &tmp[2],  &tmp[1] );
+    MPI_ECP_MUL( &tmp[3],  &tmp[3],  &P->Y   );
     /* {P,Q}->Y no longer used, so OK to write to Y even if there's aliasing. */
-    MPI_ECP_SUB( Y,    &T3,    &T4   );
+    MPI_ECP_SUB( Y,     &tmp[2],     &tmp[3] );
 
 cleanup:
 
-    mbedtls_mpi_free( &T1 ); mbedtls_mpi_free( &T2 ); mbedtls_mpi_free( &T3 ); mbedtls_mpi_free( &T4 );
+    mbedtls_mpi_free( &tmp[0] );
+    mbedtls_mpi_free( &tmp[1] );
+    mbedtls_mpi_free( &tmp[2] );
+    mbedtls_mpi_free( &tmp[3] );
 
     return( ret );
 #endif /* !defined(MBEDTLS_ECP_NO_FALLBACK) || !defined(MBEDTLS_ECP_ADD_MIXED_ALT) */
@@ -1775,6 +1778,13 @@ static int ecp_precompute_comb( const mbedtls_ecp_group *grp,
     const unsigned char T_size = 1U << ( w - 1 );
     mbedtls_ecp_point *cur, *TT[COMB_MAX_PRE - 1];
 
+    mbedtls_mpi tmp[4];
+
+    mbedtls_mpi_init( &tmp[0] );
+    mbedtls_mpi_init( &tmp[1] );
+    mbedtls_mpi_init( &tmp[2] );
+    mbedtls_mpi_init( &tmp[3] );
+
 #if defined(MBEDTLS_ECP_RESTARTABLE)
     if( rs_ctx != NULL && rs_ctx->rsm != NULL )
     {
@@ -1825,7 +1835,7 @@ dbl:
         if( j % d == 0 )
             MBEDTLS_MPI_CHK( mbedtls_ecp_copy( cur, T + ( i >> 1 ) ) );
 
-        MBEDTLS_MPI_CHK( ecp_double_jac( grp, cur, cur ) );
+        MBEDTLS_MPI_CHK( ecp_double_jac( grp, cur, cur, tmp ) );
     }
 
 #if defined(MBEDTLS_ECP_RESTARTABLE)
@@ -1884,6 +1894,12 @@ norm_add:
     MBEDTLS_MPI_CHK( ecp_normalize_jac_many( grp, TT, j ) );
 
 cleanup:
+
+    mbedtls_mpi_free( &tmp[0] );
+    mbedtls_mpi_free( &tmp[1] );
+    mbedtls_mpi_free( &tmp[2] );
+    mbedtls_mpi_free( &tmp[3] );
+
 #if defined(MBEDTLS_ECP_RESTARTABLE)
     if( rs_ctx != NULL && rs_ctx->rsm != NULL &&
         ret == MBEDTLS_ERR_ECP_IN_PROGRESS )
@@ -1940,9 +1956,14 @@ static int ecp_mul_comb_core( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     mbedtls_ecp_point Txi;
+    mbedtls_mpi tmp[4];
     size_t i;
 
     mbedtls_ecp_point_init( &Txi );
+    mbedtls_mpi_init( &tmp[0] );
+    mbedtls_mpi_init( &tmp[1] );
+    mbedtls_mpi_init( &tmp[2] );
+    mbedtls_mpi_init( &tmp[3] );
 
 #if !defined(MBEDTLS_ECP_RESTARTABLE)
     (void) rs_ctx;
@@ -1978,7 +1999,7 @@ static int ecp_mul_comb_core( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R
         MBEDTLS_ECP_BUDGET( MBEDTLS_ECP_OPS_DBL + MBEDTLS_ECP_OPS_ADD );
         --i;
 
-        MBEDTLS_MPI_CHK( ecp_double_jac( grp, R, R ) );
+        MBEDTLS_MPI_CHK( ecp_double_jac( grp, R, R, tmp ) );
         MBEDTLS_MPI_CHK( ecp_select_comb( grp, &Txi, T, T_size, x[i] ) );
         MBEDTLS_MPI_CHK( ecp_add_mixed( grp, R, R, &Txi ) );
     }
@@ -1987,6 +2008,11 @@ cleanup:
 
     mbedtls_ecp_point_free( &Txi );
 
+    mbedtls_mpi_free( &tmp[0] );
+    mbedtls_mpi_free( &tmp[1] );
+    mbedtls_mpi_free( &tmp[2] );
+    mbedtls_mpi_free( &tmp[3] );
+
 #if defined(MBEDTLS_ECP_RESTARTABLE)
     if( rs_ctx != NULL && rs_ctx->rsm != NULL &&
         ret == MBEDTLS_ERR_ECP_IN_PROGRESS )

From 3b29f2194b924acec282d31d19c8c15da22b7814 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 07:34:14 +0000
Subject: [PATCH 13/21] Keep temporaries across iterations of ecp_add_mixed()

This saves heap operations

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 46b54504b4..294bd53c97 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1499,7 +1499,8 @@ cleanup:
  * Cost: 1A := 8M + 3S
  */
 static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
-                          const mbedtls_ecp_point *P, const mbedtls_ecp_point *Q )
+                          const mbedtls_ecp_point *P, const mbedtls_ecp_point *Q,
+                          mbedtls_mpi tmp[4] )
 {
 #if defined(MBEDTLS_SELF_TEST)
     add_count++;
@@ -1514,7 +1515,6 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    mbedtls_mpi tmp[4];
 
     /* NOTE: Aliasing between input and output is allowed, so one has to make
      *       sure that at the point X,Y,Z are written, {P,Q}->{X,Y,Z} are no
@@ -1538,11 +1538,6 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     if( Q->Z.p != NULL && mbedtls_mpi_cmp_int( &Q->Z, 1 ) != 0 )
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
 
-    mbedtls_mpi_init( &tmp[0] );
-    mbedtls_mpi_init( &tmp[1] );
-    mbedtls_mpi_init( &tmp[2] );
-    mbedtls_mpi_init( &tmp[3] );
-
     MPI_ECP_SQR( &tmp[0], &P->Z         );
     MPI_ECP_MUL( &tmp[1], &tmp[0], &P->Z );
     MPI_ECP_MUL( &tmp[0], &tmp[0], &Q->X );
@@ -1586,11 +1581,6 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
 
 cleanup:
 
-    mbedtls_mpi_free( &tmp[0] );
-    mbedtls_mpi_free( &tmp[1] );
-    mbedtls_mpi_free( &tmp[2] );
-    mbedtls_mpi_free( &tmp[3] );
-
     return( ret );
 #endif /* !defined(MBEDTLS_ECP_NO_FALLBACK) || !defined(MBEDTLS_ECP_ADD_MIXED_ALT) */
 }
@@ -1872,7 +1862,7 @@ add:
     {
         j = i;
         while( j-- )
-            MBEDTLS_MPI_CHK( ecp_add_mixed( grp, &T[i + j], &T[j], &T[i] ) );
+            MBEDTLS_MPI_CHK( ecp_add_mixed( grp, &T[i + j], &T[j], &T[i], tmp ) );
     }
 
 #if defined(MBEDTLS_ECP_RESTARTABLE)
@@ -2001,7 +1991,7 @@ static int ecp_mul_comb_core( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R
 
         MBEDTLS_MPI_CHK( ecp_double_jac( grp, R, R, tmp ) );
         MBEDTLS_MPI_CHK( ecp_select_comb( grp, &Txi, T, T_size, x[i] ) );
-        MBEDTLS_MPI_CHK( ecp_add_mixed( grp, R, R, &Txi ) );
+        MBEDTLS_MPI_CHK( ecp_add_mixed( grp, R, R, &Txi, tmp ) );
     }
 
 cleanup:
@@ -2735,6 +2725,7 @@ int mbedtls_ecp_muladd_restartable(
     mbedtls_ecp_point mP;
     mbedtls_ecp_point *pmP = &mP;
     mbedtls_ecp_point *pR = R;
+    mbedtls_mpi tmp[4];
 #if defined(MBEDTLS_ECP_INTERNAL_ALT)
     char is_grp_capable = 0;
 #endif
@@ -2750,6 +2741,11 @@ int mbedtls_ecp_muladd_restartable(
 
     mbedtls_ecp_point_init( &mP );
 
+    mbedtls_mpi_init( &tmp[0] );
+    mbedtls_mpi_init( &tmp[1] );
+    mbedtls_mpi_init( &tmp[2] );
+    mbedtls_mpi_init( &tmp[3] );
+
     ECP_RS_ENTER( ma );
 
 #if defined(MBEDTLS_ECP_RESTARTABLE)
@@ -2790,7 +2786,7 @@ mul2:
 add:
 #endif
     MBEDTLS_ECP_BUDGET( MBEDTLS_ECP_OPS_ADD );
-    MBEDTLS_MPI_CHK( ecp_add_mixed( grp, pR, pmP, pR ) );
+    MBEDTLS_MPI_CHK( ecp_add_mixed( grp, pR, pmP, pR, tmp ) );
 #if defined(MBEDTLS_ECP_RESTARTABLE)
     if( rs_ctx != NULL && rs_ctx->ma != NULL )
         rs_ctx->ma->state = ecp_rsma_norm;
@@ -2806,6 +2802,12 @@ norm:
 #endif
 
 cleanup:
+
+    mbedtls_mpi_free( &tmp[0] );
+    mbedtls_mpi_free( &tmp[1] );
+    mbedtls_mpi_free( &tmp[2] );
+    mbedtls_mpi_free( &tmp[3] );
+
 #if defined(MBEDTLS_ECP_INTERNAL_ALT)
     if( is_grp_capable )
         mbedtls_internal_ecp_free( grp );

From 30838868ac135e1bc6f26e08655dd9b54841bf8d Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Tue, 4 Jan 2022 13:25:59 +0000
Subject: [PATCH 14/21] Keep temporaries across iterations of
 ecp_double_add_mxz()

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 58 +++++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 294bd53c97..43becc63e5 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -2400,7 +2400,8 @@ cleanup:
 static int ecp_double_add_mxz( const mbedtls_ecp_group *grp,
                                mbedtls_ecp_point *R, mbedtls_ecp_point *S,
                                const mbedtls_ecp_point *P, const mbedtls_ecp_point *Q,
-                               const mbedtls_mpi *d )
+                               const mbedtls_mpi *d,
+                               mbedtls_mpi T[4] )
 {
 #if defined(MBEDTLS_ECP_DOUBLE_ADD_MXZ_ALT)
     if( mbedtls_internal_ecp_grp_capable( grp ) )
@@ -2412,39 +2413,27 @@ static int ecp_double_add_mxz( const mbedtls_ecp_group *grp,
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
-    mbedtls_mpi T0,T1,T2,T3;
-
-    mbedtls_mpi_init( &T0 );
-    mbedtls_mpi_init( &T1 );
-    mbedtls_mpi_init( &T2 );
-    mbedtls_mpi_init( &T3 );
-
-    MPI_ECP_ADD( &T0,   &P->X,   &P->Z );
-    MPI_ECP_SUB( &T1,   &P->X,   &P->Z );
-    MPI_ECP_ADD( &T2,   &Q->X,   &Q->Z );
-    MPI_ECP_SUB( &T3,   &Q->X,   &Q->Z );
-    MPI_ECP_MUL( &T3,   &T3,     &T0   );
-    MPI_ECP_MUL( &T2,   &T2,     &T1   );
-    MPI_ECP_SQR( &T0,   &T0            );
-    MPI_ECP_SQR( &T1,   &T1            );
-    MPI_ECP_MUL( &R->X, &T0,     &T1   );
-    MPI_ECP_SUB( &T0,   &T0,     &T1   );
-    MPI_ECP_MUL( &R->Z, &grp->A, &T0   );
-    MPI_ECP_ADD( &R->Z, &T1,     &R->Z );
-    MPI_ECP_ADD( &S->X, &T3,     &T2   );
+    MPI_ECP_ADD( &T[0], &P->X,   &P->Z );
+    MPI_ECP_SUB( &T[1], &P->X,   &P->Z );
+    MPI_ECP_ADD( &T[2], &Q->X,   &Q->Z );
+    MPI_ECP_SUB( &T[3], &Q->X,   &Q->Z );
+    MPI_ECP_MUL( &T[3], &T[3],   &T[0] );
+    MPI_ECP_MUL( &T[2], &T[2],   &T[1] );
+    MPI_ECP_SQR( &T[0], &T[0]          );
+    MPI_ECP_SQR( &T[1], &T[1]          );
+    MPI_ECP_MUL( &R->X, &T[0],   &T[1] );
+    MPI_ECP_SUB( &T[0], &T[0],   &T[1] );
+    MPI_ECP_MUL( &R->Z, &grp->A, &T[0] );
+    MPI_ECP_ADD( &R->Z, &T[1],   &R->Z );
+    MPI_ECP_ADD( &S->X, &T[3],   &T[2] );
     MPI_ECP_SQR( &S->X, &S->X          );
-    MPI_ECP_SUB( &S->Z, &T3,     &T2   );
+    MPI_ECP_SUB( &S->Z, &T[3],   &T[2] );
     MPI_ECP_SQR( &S->Z, &S->Z          );
     MPI_ECP_MUL( &S->Z, d,       &S->Z );
-    MPI_ECP_MUL( &R->Z, &T0,     &R->Z );
+    MPI_ECP_MUL( &R->Z, &T[0],   &R->Z );
 
 cleanup:
 
-    mbedtls_mpi_free( &T0 );
-    mbedtls_mpi_free( &T1 );
-    mbedtls_mpi_free( &T2 );
-    mbedtls_mpi_free( &T3 );
-
     return( ret );
 #endif /* !defined(MBEDTLS_ECP_NO_FALLBACK) || !defined(MBEDTLS_ECP_DOUBLE_ADD_MXZ_ALT) */
 }
@@ -2463,8 +2452,14 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     unsigned char b;
     mbedtls_ecp_point RP;
     mbedtls_mpi PX;
+    mbedtls_mpi tmp[4];
     mbedtls_ecp_point_init( &RP ); mbedtls_mpi_init( &PX );
 
+    mbedtls_mpi_init( &tmp[0] );
+    mbedtls_mpi_init( &tmp[1] );
+    mbedtls_mpi_init( &tmp[2] );
+    mbedtls_mpi_init( &tmp[3] );
+
     if( f_rng == NULL )
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
 
@@ -2497,7 +2492,7 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
          */
         MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->X, &RP.X, b ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->Z, &RP.Z, b ) );
-        MBEDTLS_MPI_CHK( ecp_double_add_mxz( grp, R, &RP, R, &RP, &PX ) );
+        MBEDTLS_MPI_CHK( ecp_double_add_mxz( grp, R, &RP, R, &RP, &PX, tmp ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->X, &RP.X, b ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->Z, &RP.Z, b ) );
     }
@@ -2519,6 +2514,11 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
 cleanup:
     mbedtls_ecp_point_free( &RP ); mbedtls_mpi_free( &PX );
 
+    mbedtls_mpi_free( &tmp[0] );
+    mbedtls_mpi_free( &tmp[1] );
+    mbedtls_mpi_free( &tmp[2] );
+    mbedtls_mpi_free( &tmp[3] );
+
     return( ret );
 }
 

From 6a28870b1eb04181989c37a60e47150e2fb207d8 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Wed, 5 Jan 2022 05:19:48 +0000
Subject: [PATCH 15/21] Make ecp_select_comb() create valid EC point with Z
 coordinate set

ecp_select_comb() did previously not set the Z coordinate of the target point.
Instead, callers would either set it explicitly or leave it uninitialized,
relying on the (only partly upheld) convention that sometimes an uninitialized
Z value represents 1.

This commit modifies ecp_select_comb() to always set the Z coordinate to 1.
This comes at the cost of memory for a single coordinate, which seems worth
it for the increased robustness.

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/library/ecp.c b/library/ecp.c
index 43becc63e5..c86d55d4db 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1927,6 +1927,8 @@ static int ecp_select_comb( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     /* Safely invert result if i is "negative" */
     MBEDTLS_MPI_CHK( ecp_safe_invert_jac( grp, R, i >> 7 ) );
 
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &R->Z, 1 ) );
+
 cleanup:
     return( ret );
 }
@@ -1979,7 +1981,6 @@ static int ecp_mul_comb_core( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R
         /* Start with a non-zero point and randomize its coordinates */
         i = d;
         MBEDTLS_MPI_CHK( ecp_select_comb( grp, R, T, T_size, x[i] ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &R->Z, 1 ) );
         if( f_rng != 0 )
             MBEDTLS_MPI_CHK( ecp_randomize_jac( grp, R, f_rng, p_rng ) );
     }

From 595616e5cd97eb3965058851edfc048d0ee24db1 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Wed, 5 Jan 2022 08:28:24 +0000
Subject: [PATCH 16/21] Add more wrappers for internal ECP coordinate
 operations

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 92 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 34 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index c86d55d4db..501e5cb96c 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1185,30 +1185,60 @@ cleanup:
 }
 #endif /* All functions referencing mbedtls_mpi_shift_l_mod() are alt-implemented without fallback */
 
-#define MPI_ECP_ADD( X, A, B )                                      \
+/*
+ * Macro wrappers around ECP modular arithmetic
+ *
+ * Currently, these wrappers are defined via the bignum module.
+ */
+
+#define MPI_ECP_ADD( X, A, B )                                                  \
     MBEDTLS_MPI_CHK( mbedtls_mpi_add_mod( grp, X, A, B ) )
 
-#define MPI_ECP_SUB( X, A, B )                                      \
+#define MPI_ECP_SUB( X, A, B )                                                  \
     MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mod( grp, X, A, B ) )
 
-#define MPI_ECP_MUL( X, A, B )                                      \
+#define MPI_ECP_MUL( X, A, B )                                                  \
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, X, A, B ) )
 
-#define MPI_ECP_SQR( X, A )                                         \
+#define MPI_ECP_SQR( X, A )                                                     \
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mod( grp, X, A, A ) )
 
-#define MPI_ECP_MUL_INT( X, A, c )                                  \
+#define MPI_ECP_MUL_INT( X, A, c )                                              \
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int_mod( grp, X, A, c ) )
 
-#define MPI_ECP_INV( dst, src )                                     \
+#define MPI_ECP_INV( dst, src )                                                 \
     MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( (dst), (src), &grp->P ) )
 
-#define MPI_ECP_MOV( X, A )                                         \
+#define MPI_ECP_MOV( X, A )                                                     \
     MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) )
 
-#define MPI_ECP_SHIFT_L( X, count )                                 \
+#define MPI_ECP_SHIFT_L( X, count )                                             \
     MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l_mod( grp, X, count ) )
 
+#define MPI_ECP_LSET( X, c )                                                    \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, c ) )
+
+#define MPI_ECP_CMP_INT( X, c )                                                 \
+    mbedtls_mpi_cmp_int( X, c )
+
+#define MPI_ECP_CMP( X, Y )                                                     \
+    mbedtls_mpi_cmp_mpi( X, Y )
+
+/* Needs f_rng, p_rng to be defined. */
+#define MPI_ECP_RAND( X )                                                       \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_random( (X), 2, &grp->P, f_rng, p_rng ) )
+
+/* Conditional negation
+ * Needs grp and a temporary MPI tmp to be defined. */
+#define MPI_ECP_COND_NEG( X, cond )                                        \
+    do                                                                     \
+    {                                                                      \
+        unsigned char nonzero = mbedtls_mpi_cmp_int( (X), 0 ) != 0;        \
+        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &tmp, &grp->P, (X) ) );      \
+        MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_assign( (X), &tmp,          \
+                                                       nonzero & cond ) ); \
+    } while( 0 )
+
 #if defined(MBEDTLS_ECP_SHORT_WEIERSTRASS_ENABLED)
 /*
  * For curves in short Weierstrass form, we do all the internal operations in
@@ -1224,7 +1254,7 @@ cleanup:
  */
 static int ecp_normalize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *pt )
 {
-    if( mbedtls_mpi_cmp_int( &pt->Z, 0 ) == 0 )
+    if( MPI_ECP_CMP_INT( &pt->Z, 0 ) == 0 )
         return( 0 );
 
 #if defined(MBEDTLS_ECP_NORMALIZE_JAC_ALT)
@@ -1245,7 +1275,7 @@ static int ecp_normalize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
     MPI_ECP_MUL( &pt->X,   &pt->X,     &T );  /* X   <- X  * T = X / Z^2 */
     MPI_ECP_MUL( &pt->Y,   &pt->Y,     &T );  /* Y'' <- Y' * T = Y / Z^3 */
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &pt->Z, 1 ) );
+    MPI_ECP_LSET( &pt->Z, 1 );
 
 cleanup:
 
@@ -1371,19 +1401,13 @@ static int ecp_safe_invert_jac( const mbedtls_ecp_group *grp,
                             unsigned char inv )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    unsigned char nonzero;
-    mbedtls_mpi mQY;
+    mbedtls_mpi tmp;
+    mbedtls_mpi_init( &tmp );
 
-    mbedtls_mpi_init( &mQY );
-
-    /* Use the fact that -Q.Y mod P = P - Q.Y unless Q.Y == 0 */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &mQY, &grp->P, &Q->Y ) );
-    nonzero = mbedtls_mpi_cmp_int( &Q->Y, 0 ) != 0;
-    MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_assign( &Q->Y, &mQY, inv & nonzero ) );
+    MPI_ECP_COND_NEG( &Q->Y, inv );
 
 cleanup:
-    mbedtls_mpi_free( &mQY );
-
+    mbedtls_mpi_free( &tmp );
     return( ret );
 }
 
@@ -1436,7 +1460,7 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
         MPI_ECP_MUL_INT( &tmp[0],  &tmp[1],  3 );
 
         /* Optimize away for "koblitz" curves with A = 0 */
-        if( mbedtls_mpi_cmp_int( &grp->A, 0 ) != 0 )
+        if( MPI_ECP_CMP_INT( &grp->A, 0 ) != 0 )
         {
             /* M += A.Z^4 */
             MPI_ECP_SQR( &tmp[1],  &P->Z                );
@@ -1470,9 +1494,9 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     MPI_ECP_MUL(     &tmp[3],  &P->Y,  &P->Z   );
     MPI_ECP_SHIFT_L( &tmp[3],  1               );
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->X, &tmp[2] ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Y, &tmp[1] ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R->Z, &tmp[3] ) );
+    MPI_ECP_MOV( &R->X, &tmp[2] );
+    MPI_ECP_MOV( &R->Y, &tmp[1] );
+    MPI_ECP_MOV( &R->Z, &tmp[3] );
 
 cleanup:
 
@@ -1546,9 +1570,9 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     MPI_ECP_SUB( &tmp[1], &tmp[1], &P->Y );
 
     /* Special cases (2) and (3) */
-    if( mbedtls_mpi_cmp_int( &tmp[0], 0 ) == 0 )
+    if( MPI_ECP_CMP_INT( &tmp[0], 0 ) == 0 )
     {
-        if( mbedtls_mpi_cmp_int( &tmp[1], 0 ) == 0 )
+        if( MPI_ECP_CMP_INT( &tmp[1], 0 ) == 0 )
         {
             ret = ecp_double_jac( grp, R, P, tmp );
             goto cleanup;
@@ -1609,7 +1633,7 @@ static int ecp_randomize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
     mbedtls_mpi_init( &l );
 
     /* Generate l such that 1 < l < p */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_random( &l, 2, &grp->P, f_rng, p_rng ) );
+    MPI_ECP_RAND( &l );
 
     /* Z = l * Z */
     MPI_ECP_MUL( &pt->Z,   &pt->Z,     &l );
@@ -1927,7 +1951,7 @@ static int ecp_select_comb( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     /* Safely invert result if i is "negative" */
     MBEDTLS_MPI_CHK( ecp_safe_invert_jac( grp, R, i >> 7 ) );
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &R->Z, 1 ) );
+    MPI_ECP_LSET( &R->Z, 1 );
 
 cleanup:
     return( ret );
@@ -2338,7 +2362,7 @@ static int ecp_normalize_mxz( const mbedtls_ecp_group *grp, mbedtls_ecp_point *P
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     MPI_ECP_INV( &P->Z, &P->Z );
     MPI_ECP_MUL( &P->X, &P->X, &P->Z );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &P->Z, 1 ) );
+    MPI_ECP_LSET( &P->Z, 1 );
 
 cleanup:
     return( ret );
@@ -2369,7 +2393,7 @@ static int ecp_randomize_mxz( const mbedtls_ecp_group *grp, mbedtls_ecp_point *P
     mbedtls_mpi_init( &l );
 
     /* Generate l such that 1 < l < p */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_random( &l, 2, &grp->P, f_rng, p_rng ) );
+    MPI_ECP_RAND( &l );
 
     MPI_ECP_MUL( &P->X, &P->X, &l );
     MPI_ECP_MUL( &P->Z, &P->Z, &l );
@@ -2465,12 +2489,12 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
 
     /* Save PX and read from P before writing to R, in case P == R */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &PX, &P->X ) );
+    MPI_ECP_MOV( &PX, &P->X );
     MBEDTLS_MPI_CHK( mbedtls_ecp_copy( &RP, P ) );
 
     /* Set R to zero in modified x/z coordinates */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &R->X, 1 ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &R->Z, 0 ) );
+    MPI_ECP_LSET( &R->X, 1 );
+    MPI_ECP_LSET( &R->Z, 0 );
     mbedtls_mpi_free( &R->Y );
 
     /* RP.X might be sligtly larger than P, so reduce it */
@@ -2664,7 +2688,7 @@ static int ecp_check_pubkey_sw( const mbedtls_ecp_group *grp, const mbedtls_ecp_
     MPI_ECP_MUL( &RHS, &RHS, &pt->X  );
     MPI_ECP_ADD( &RHS, &RHS, &grp->B );
 
-    if( mbedtls_mpi_cmp_mpi( &YY, &RHS ) != 0 )
+    if( MPI_ECP_CMP( &YY, &RHS ) != 0 )
         ret = MBEDTLS_ERR_ECP_INVALID_KEY;
 
 cleanup:

From c27a0e0093c14ba4f1a28b80ecd66e2637e7338a Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Thu, 6 Jan 2022 05:56:34 +0000
Subject: [PATCH 17/21] Add more wrappers for ECP MPI operations

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 501e5cb96c..47ac5c0903 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1239,6 +1239,17 @@ cleanup:
                                                        nonzero & cond ) ); \
     } while( 0 )
 
+#define MPI_ECP_NEG( X ) MPI_ECP_COND_NEG( (X), 1 )
+
+#define MPI_ECP_VALID( X )                      \
+    ( (X)->p != NULL )
+
+#define MPI_ECP_COND_ASSIGN( X, Y, cond )       \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_assign( (X), (Y), (cond) ) )
+
+#define MPI_ECP_COND_SWAP( X, Y, cond )       \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( (X), (Y), (cond) ) )
+
 #if defined(MBEDTLS_ECP_SHORT_WEIERSTRASS_ENABLED)
 /*
  * For curves in short Weierstrass form, we do all the internal operations in
@@ -1550,16 +1561,16 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     /*
      * Trivial cases: P == 0 or Q == 0 (case 1)
      */
-    if( mbedtls_mpi_cmp_int( &P->Z, 0 ) == 0 )
+    if( MPI_ECP_CMP_INT( &P->Z, 0 ) == 0 )
         return( mbedtls_ecp_copy( R, Q ) );
 
-    if( Q->Z.p != NULL && mbedtls_mpi_cmp_int( &Q->Z, 0 ) == 0 )
+    if( MPI_ECP_VALID( &Q->Z ) && MPI_ECP_CMP_INT( &Q->Z, 0 ) == 0 )
         return( mbedtls_ecp_copy( R, P ) );
 
     /*
      * Make sure Q coordinates are normalized
      */
-    if( Q->Z.p != NULL && mbedtls_mpi_cmp_int( &Q->Z, 1 ) != 0 )
+    if( MPI_ECP_VALID( &Q->Z ) && MPI_ECP_CMP_INT( &Q->Z, 1 ) != 0 )
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
 
     MPI_ECP_SQR( &tmp[0], &P->Z         );
@@ -1944,8 +1955,8 @@ static int ecp_select_comb( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     /* Read the whole table to thwart cache-based timing attacks */
     for( j = 0; j < T_size; j++ )
     {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_assign( &R->X, &T[j].X, j == ii ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_assign( &R->Y, &T[j].Y, j == ii ) );
+        MPI_ECP_COND_ASSIGN( &R->X, &T[j].X, j == ii );
+        MPI_ECP_COND_ASSIGN( &R->Y, &T[j].Y, j == ii );
     }
 
     /* Safely invert result if i is "negative" */
@@ -2229,8 +2240,8 @@ static int ecp_mul_comb( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
 
     /* Is P the base point ? */
 #if MBEDTLS_ECP_FIXED_POINT_OPTIM == 1
-    p_eq_g = ( mbedtls_mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
-               mbedtls_mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
+    p_eq_g = ( MPI_ECP_CMP( &P->Y, &grp->G.Y ) == 0 &&
+               MPI_ECP_CMP( &P->X, &grp->G.X ) == 0 );
 #else
     p_eq_g = 0;
 #endif
@@ -2515,11 +2526,11 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
          *  else   double_add( R, RP, R, RP )
          * but using safe conditional swaps to avoid leaks
          */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->X, &RP.X, b ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->Z, &RP.Z, b ) );
+        MPI_ECP_COND_SWAP( &R->X, &RP.X, b );
+        MPI_ECP_COND_SWAP( &R->Z, &RP.Z, b );
         MBEDTLS_MPI_CHK( ecp_double_add_mxz( grp, R, &RP, R, &RP, &PX, tmp ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->X, &RP.X, b ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->Z, &RP.Z, b ) );
+        MPI_ECP_COND_SWAP( &R->X, &RP.X, b );
+        MPI_ECP_COND_SWAP( &R->Z, &RP.Z, b );
     }
 
     /*
@@ -2711,6 +2722,8 @@ static int mbedtls_ecp_mul_shortcuts( mbedtls_ecp_group *grp,
                                       mbedtls_ecp_restart_ctx *rs_ctx )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+    mbedtls_mpi tmp;
+    mbedtls_mpi_init( &tmp );
 
     if( mbedtls_mpi_cmp_int( m, 0 ) == 0 )
     {
@@ -2723,8 +2736,7 @@ static int mbedtls_ecp_mul_shortcuts( mbedtls_ecp_group *grp,
     else if( mbedtls_mpi_cmp_int( m, -1 ) == 0 )
     {
         MBEDTLS_MPI_CHK( mbedtls_ecp_copy( R, P ) );
-        if( mbedtls_mpi_cmp_int( &R->Y, 0 ) != 0 )
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &R->Y, &grp->P, &R->Y ) );
+        MPI_ECP_NEG( &R->Y );
     }
     else
     {
@@ -2733,6 +2745,8 @@ static int mbedtls_ecp_mul_shortcuts( mbedtls_ecp_group *grp,
     }
 
 cleanup:
+    mbedtls_mpi_free( &tmp );
+
     return( ret );
 }
 

From ee95f6c4c9726eed26d6cfbf19ce5f3125e9f991 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Sun, 9 Jan 2022 05:46:18 +0000
Subject: [PATCH 18/21] Don't allow Z coordinate being unset in ecp_add_mixed()

Previously, ecp_add_mixed(), commputing say P+Q, would allow for the
Q parameter to have an unset Z coordinate as a shortcut for Z == 1.
This was leveraged during computation and usage of the T-table
(storing low multiples of the to-be-multiplied point on the curve).
It is a potentially error-prone corner case, though, since an MPIs
with unset data pointer coordinate and limb size 0 is also a valid
representation of the number 0.

As a first step towards removing ECP points with unset Z coordinate,
the constant time T-array getter ecp_select_comb() has previously
been modified to return 'full' mbedtls_ecp_point structures,
including a 1-initialized Z-coordinate.

Similarly, this commit ...

- Modifies ecp_normalize_jac_many() to set the Z coordinates
  of the points it operates on to 1 instead of freeing them.

- Frees the Z-coordinates of the T[]-array explicitly
  once the computation and normalization of the T-table has finished.

  As a minimal functional difference between old and new code,
  the new code also frees the Z-coordinate of T[0]=P, which the
  old code did not.

- Modifies ecp_add_mixed() to no longer allow unset Z coordinates.

Except for the post-precomputation storage form of the T[] array,
the code does therefore no longer use EC points with unset Z coordinate.

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 47ac5c0903..f5ae8ee471 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1386,7 +1386,8 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
          */
         MBEDTLS_MPI_CHK( mbedtls_mpi_shrink( &T[i]->X, grp->P.n ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_shrink( &T[i]->Y, grp->P.n ) );
-        mbedtls_mpi_free( &T[i]->Z );
+
+        MPI_ECP_LSET( &T[i]->Z, 1 );
 
         if( i == 0 )
             break;
@@ -1529,8 +1530,6 @@ cleanup:
  *   due to the choice of precomputed points in the modified comb method.
  * So branches for these cases do not leak secret information.
  *
- * We accept Q->Z being unset (saving memory in tables) as meaning 1.
- *
  * Cost: 1A := 8M + 3S
  */
 static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
@@ -1558,19 +1557,22 @@ static int ecp_add_mixed( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     mbedtls_mpi * const Y = &R->Y;
     mbedtls_mpi * const Z = &R->Z;
 
+    if( !MPI_ECP_VALID( &Q->Z ) )
+        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
+
     /*
      * Trivial cases: P == 0 or Q == 0 (case 1)
      */
     if( MPI_ECP_CMP_INT( &P->Z, 0 ) == 0 )
         return( mbedtls_ecp_copy( R, Q ) );
 
-    if( MPI_ECP_VALID( &Q->Z ) && MPI_ECP_CMP_INT( &Q->Z, 0 ) == 0 )
+    if( MPI_ECP_CMP_INT( &Q->Z, 0 ) == 0 )
         return( mbedtls_ecp_copy( R, P ) );
 
     /*
      * Make sure Q coordinates are normalized
      */
-    if( MPI_ECP_VALID( &Q->Z ) && MPI_ECP_CMP_INT( &Q->Z, 1 ) != 0 )
+    if( MPI_ECP_CMP_INT( &Q->Z, 1 ) != 0 )
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
 
     MPI_ECP_SQR( &tmp[0], &P->Z         );
@@ -1918,6 +1920,14 @@ norm_add:
 
     MBEDTLS_MPI_CHK( ecp_normalize_jac_many( grp, TT, j ) );
 
+    /* Free Z coordinate (=1 after normalization) to save RAM.
+     * This makes T[i] invalid as mbedtls_ecp_points, but this is OK
+     * since from this point onwards, they are only accessed indirectly
+     * via the getter function ecp_select_comb() which does set the
+     * target's Z coordinate to 1. */
+    for( i = 0; i < T_size; i++ )
+        mbedtls_mpi_free( &T[i].Z );
+
 cleanup:
 
     mbedtls_mpi_free( &tmp[0] );

From ac4d4bc97c3016db4d2652731bc880858eb1051d Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Sun, 9 Jan 2022 05:58:49 +0000
Subject: [PATCH 19/21] Improve documentation of ECP module

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 72 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 32 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index f5ae8ee471..bfa3934036 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1334,7 +1334,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
     mbedtls_mpi_init( &t );
 
     /*
-     * c[i] = Z_0 * ... * Z_i
+     * c[i] = Z_0 * ... * Z_i,   i = 0,..,n := T_size-1
      */
     MPI_ECP_MOV( &c[0], &T[0]->Z );
     for( i = 1; i < T_size; i++ )
@@ -1458,7 +1458,7 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     /* Special case for A = -3 */
     if( grp->A.p == NULL )
     {
-        /* M = 3(X + Z^2)(X - Z^2) */
+        /* tmp[0] <- M = 3(X + Z^2)(X - Z^2) */
         MPI_ECP_SQR(     &tmp[1],  &P->Z                );
         MPI_ECP_ADD(     &tmp[2],  &P->X,  &tmp[1]      );
         MPI_ECP_SUB(     &tmp[3],  &P->X,  &tmp[1]      );
@@ -1467,7 +1467,7 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     }
     else
     {
-        /* M = 3.X^2 */
+        /* tmp[0] <- M = 3.X^2 + A.Z^4 */
         MPI_ECP_SQR(     &tmp[1],  &P->X  );
         MPI_ECP_MUL_INT( &tmp[0],  &tmp[1],  3 );
 
@@ -1482,30 +1482,31 @@ static int ecp_double_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
         }
     }
 
-    /* S = 4.X.Y^2 */
+    /* tmp[1] <- S = 4.X.Y^2 */
     MPI_ECP_SQR(     &tmp[2],  &P->Y     );
     MPI_ECP_SHIFT_L( &tmp[2],  1         );
     MPI_ECP_MUL(     &tmp[1],  &P->X, &tmp[2] );
     MPI_ECP_SHIFT_L( &tmp[1],  1         );
 
-    /* U = 8.Y^4 */
+    /* tmp[3] <- U = 8.Y^4 */
     MPI_ECP_SQR(     &tmp[3],  &tmp[2] );
     MPI_ECP_SHIFT_L( &tmp[3],  1       );
 
-    /* T = M^2 - 2.S */
+    /* tmp[2] <- T = M^2 - 2.S */
     MPI_ECP_SQR( &tmp[2],  &tmp[0]     );
     MPI_ECP_SUB( &tmp[2],  &tmp[2], &tmp[1] );
     MPI_ECP_SUB( &tmp[2],  &tmp[2], &tmp[1] );
 
-    /* S = M(S - T) - U */
+    /* tmp[1] <- S = M(S - T) - U */
     MPI_ECP_SUB( &tmp[1],  &tmp[1],     &tmp[2] );
     MPI_ECP_MUL( &tmp[1],  &tmp[1],     &tmp[0] );
     MPI_ECP_SUB( &tmp[1],  &tmp[1],     &tmp[3] );
 
-    /* U = 2.Y.Z */
+    /* tmp[3] <- U = 2.Y.Z */
     MPI_ECP_MUL(     &tmp[3],  &P->Y,  &P->Z   );
     MPI_ECP_SHIFT_L( &tmp[3],  1               );
 
+    /* Store results */
     MPI_ECP_MOV( &R->X, &tmp[2] );
     MPI_ECP_MOV( &R->Y, &tmp[1] );
     MPI_ECP_MOV( &R->Z, &tmp[3] );
@@ -1522,6 +1523,10 @@ cleanup:
  * The coordinates of Q must be normalized (= affine),
  * but those of P don't need to. R is not normalized.
  *
+ * P,Q,R may alias, but only at the level of EC points: they must be either
+ * equal as pointers, or disjoint (including the coordinate data buffers).
+ * Fine-grained aliasing at the level of coordinates is not supported.
+ *
  * Special cases: (1) P or Q is zero, (2) R is zero, (3) P == Q.
  * None of these cases can happen as intermediate step in ecp_mul_comb():
  * - at each step, P, Q and R are multiples of the base point, the factor
@@ -1648,17 +1653,17 @@ static int ecp_randomize_jac( const mbedtls_ecp_group *grp, mbedtls_ecp_point *p
     /* Generate l such that 1 < l < p */
     MPI_ECP_RAND( &l );
 
-    /* Z = l * Z */
+    /* Z' = l * Z */
     MPI_ECP_MUL( &pt->Z,   &pt->Z,     &l );
 
-    /* Y = l * Z */
+    /* Y' = l * Y */
     MPI_ECP_MUL( &pt->Y,   &pt->Y,     &l );
 
-    /* X = l^2 * X */
+    /* X' = l^2 * X */
     MPI_ECP_SQR( &l,       &l             );
     MPI_ECP_MUL( &pt->X,   &pt->X,     &l );
 
-    /* Y = l^3 * Y */
+    /* Y'' = l^2 * Y' = l^3 * Y */
     MPI_ECP_MUL( &pt->Y,   &pt->Y,     &l );
 
 cleanup:
@@ -1872,8 +1877,11 @@ dbl:
 norm_dbl:
 #endif
     /*
-     * Normalize current elements in T. As T has holes,
-     * use an auxiliary array of pointers to elements in T.
+     * Normalize current elements in T to allow them to be used in
+     * ecp_add_mixed() below, which requires one normalized input.
+     *
+     * As T has holes, use an auxiliary array of pointers to elements in T.
+     *
      */
     j = 0;
     for( i = 1; i < T_size; i <<= 1 )
@@ -2459,24 +2467,24 @@ static int ecp_double_add_mxz( const mbedtls_ecp_group *grp,
 #else
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
-    MPI_ECP_ADD( &T[0], &P->X,   &P->Z );
-    MPI_ECP_SUB( &T[1], &P->X,   &P->Z );
-    MPI_ECP_ADD( &T[2], &Q->X,   &Q->Z );
-    MPI_ECP_SUB( &T[3], &Q->X,   &Q->Z );
-    MPI_ECP_MUL( &T[3], &T[3],   &T[0] );
-    MPI_ECP_MUL( &T[2], &T[2],   &T[1] );
-    MPI_ECP_SQR( &T[0], &T[0]          );
-    MPI_ECP_SQR( &T[1], &T[1]          );
-    MPI_ECP_MUL( &R->X, &T[0],   &T[1] );
-    MPI_ECP_SUB( &T[0], &T[0],   &T[1] );
-    MPI_ECP_MUL( &R->Z, &grp->A, &T[0] );
-    MPI_ECP_ADD( &R->Z, &T[1],   &R->Z );
-    MPI_ECP_ADD( &S->X, &T[3],   &T[2] );
-    MPI_ECP_SQR( &S->X, &S->X          );
-    MPI_ECP_SUB( &S->Z, &T[3],   &T[2] );
-    MPI_ECP_SQR( &S->Z, &S->Z          );
-    MPI_ECP_MUL( &S->Z, d,       &S->Z );
-    MPI_ECP_MUL( &R->Z, &T[0],   &R->Z );
+    MPI_ECP_ADD( &T[0], &P->X,   &P->Z ); /* Pp := PX + PZ                    */
+    MPI_ECP_SUB( &T[1], &P->X,   &P->Z ); /* Pm := PX - PZ                    */
+    MPI_ECP_ADD( &T[2], &Q->X,   &Q->Z ); /* Qp := QX + XZ                    */
+    MPI_ECP_SUB( &T[3], &Q->X,   &Q->Z ); /* Qm := QX - QZ                    */
+    MPI_ECP_MUL( &T[3], &T[3],   &T[0] ); /* Qm * Pp                          */
+    MPI_ECP_MUL( &T[2], &T[2],   &T[1] ); /* Qp * Pm                          */
+    MPI_ECP_SQR( &T[0], &T[0]          ); /* Pp^2                             */
+    MPI_ECP_SQR( &T[1], &T[1]          ); /* Pm^2                             */
+    MPI_ECP_MUL( &R->X, &T[0],   &T[1] ); /* Pp^2 * Pm^2                      */
+    MPI_ECP_SUB( &T[0], &T[0],   &T[1] ); /* Pp^2 - Pm^2                      */
+    MPI_ECP_MUL( &R->Z, &grp->A, &T[0] ); /* A * (Pp^2 - Pm^2)                */
+    MPI_ECP_ADD( &R->Z, &T[1],   &R->Z ); /* [ A * (Pp^2-Pm^2) ] + Pm^2       */
+    MPI_ECP_ADD( &S->X, &T[3],   &T[2] ); /* Qm*Pp + Qp*Pm                    */
+    MPI_ECP_SQR( &S->X, &S->X          ); /* (Qm*Pp + Qp*Pm)^2                */
+    MPI_ECP_SUB( &S->Z, &T[3],   &T[2] ); /* Qm*Pp - Qp*Pm                    */
+    MPI_ECP_SQR( &S->Z, &S->Z          ); /* (Qm*Pp - Qp*Pm)^2                */
+    MPI_ECP_MUL( &S->Z, d,       &S->Z ); /* d * ( Qm*Pp - Qp*Pm )^2          */
+    MPI_ECP_MUL( &R->Z, &T[0],   &R->Z ); /* [A*(Pp^2-Pm^2)+Pm^2]*(Pp^2-Pm^2) */
 
 cleanup:
 

From 466df6e713fb743a7960f45774ba8470a6ef4504 Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Mon, 10 Jan 2022 11:16:51 +0000
Subject: [PATCH 20/21] Introduce helper function for init/free of MPI array

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 55 +++++++++++++++++++--------------------------------
 1 file changed, 20 insertions(+), 35 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index bfa3934036..5758ca9799 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -342,6 +342,18 @@ int mbedtls_ecp_check_budget( const mbedtls_ecp_group *grp,
 
 #endif /* MBEDTLS_ECP_RESTARTABLE */
 
+static void mpi_init_many( mbedtls_mpi *arr, unsigned size )
+{
+    while( size-- )
+        mbedtls_mpi_init( arr++ );
+}
+
+static void mpi_free_many( mbedtls_mpi *arr, unsigned size )
+{
+    while( size-- )
+        mbedtls_mpi_free( arr++ );
+}
+
 /*
  * List of supported curves:
  *  - internal ID
@@ -1812,10 +1824,7 @@ static int ecp_precompute_comb( const mbedtls_ecp_group *grp,
 
     mbedtls_mpi tmp[4];
 
-    mbedtls_mpi_init( &tmp[0] );
-    mbedtls_mpi_init( &tmp[1] );
-    mbedtls_mpi_init( &tmp[2] );
-    mbedtls_mpi_init( &tmp[3] );
+    mpi_init_many( tmp, sizeof( tmp ) / sizeof( mbedtls_mpi ) );
 
 #if defined(MBEDTLS_ECP_RESTARTABLE)
     if( rs_ctx != NULL && rs_ctx->rsm != NULL )
@@ -1938,10 +1947,7 @@ norm_add:
 
 cleanup:
 
-    mbedtls_mpi_free( &tmp[0] );
-    mbedtls_mpi_free( &tmp[1] );
-    mbedtls_mpi_free( &tmp[2] );
-    mbedtls_mpi_free( &tmp[3] );
+    mpi_free_many( tmp, sizeof( tmp ) / sizeof( mbedtls_mpi ) );
 
 #if defined(MBEDTLS_ECP_RESTARTABLE)
     if( rs_ctx != NULL && rs_ctx->rsm != NULL &&
@@ -2005,10 +2011,7 @@ static int ecp_mul_comb_core( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R
     size_t i;
 
     mbedtls_ecp_point_init( &Txi );
-    mbedtls_mpi_init( &tmp[0] );
-    mbedtls_mpi_init( &tmp[1] );
-    mbedtls_mpi_init( &tmp[2] );
-    mbedtls_mpi_init( &tmp[3] );
+    mpi_init_many( tmp, sizeof( tmp ) / sizeof( mbedtls_mpi ) );
 
 #if !defined(MBEDTLS_ECP_RESTARTABLE)
     (void) rs_ctx;
@@ -2051,11 +2054,7 @@ static int ecp_mul_comb_core( const mbedtls_ecp_group *grp, mbedtls_ecp_point *R
 cleanup:
 
     mbedtls_ecp_point_free( &Txi );
-
-    mbedtls_mpi_free( &tmp[0] );
-    mbedtls_mpi_free( &tmp[1] );
-    mbedtls_mpi_free( &tmp[2] );
-    mbedtls_mpi_free( &tmp[3] );
+    mpi_free_many( tmp, sizeof( tmp ) / sizeof( mbedtls_mpi ) );
 
 #if defined(MBEDTLS_ECP_RESTARTABLE)
     if( rs_ctx != NULL && rs_ctx->rsm != NULL &&
@@ -2509,10 +2508,7 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     mbedtls_mpi tmp[4];
     mbedtls_ecp_point_init( &RP ); mbedtls_mpi_init( &PX );
 
-    mbedtls_mpi_init( &tmp[0] );
-    mbedtls_mpi_init( &tmp[1] );
-    mbedtls_mpi_init( &tmp[2] );
-    mbedtls_mpi_init( &tmp[3] );
+    mpi_init_many( tmp, sizeof( tmp ) / sizeof( mbedtls_mpi ) );
 
     if( f_rng == NULL )
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
@@ -2568,11 +2564,7 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
 cleanup:
     mbedtls_ecp_point_free( &RP ); mbedtls_mpi_free( &PX );
 
-    mbedtls_mpi_free( &tmp[0] );
-    mbedtls_mpi_free( &tmp[1] );
-    mbedtls_mpi_free( &tmp[2] );
-    mbedtls_mpi_free( &tmp[3] );
-
+    mpi_free_many( tmp, sizeof( tmp ) / sizeof( mbedtls_mpi ) );
     return( ret );
 }
 
@@ -2797,11 +2789,7 @@ int mbedtls_ecp_muladd_restartable(
         return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
 
     mbedtls_ecp_point_init( &mP );
-
-    mbedtls_mpi_init( &tmp[0] );
-    mbedtls_mpi_init( &tmp[1] );
-    mbedtls_mpi_init( &tmp[2] );
-    mbedtls_mpi_init( &tmp[3] );
+    mpi_init_many( tmp, sizeof( tmp ) / sizeof( mbedtls_mpi ) );
 
     ECP_RS_ENTER( ma );
 
@@ -2860,10 +2848,7 @@ norm:
 
 cleanup:
 
-    mbedtls_mpi_free( &tmp[0] );
-    mbedtls_mpi_free( &tmp[1] );
-    mbedtls_mpi_free( &tmp[2] );
-    mbedtls_mpi_free( &tmp[3] );
+    mpi_free_many( tmp, sizeof( tmp ) / sizeof( mbedtls_mpi ) );
 
 #if defined(MBEDTLS_ECP_INTERNAL_ALT)
     if( is_grp_capable )

From bae30235764dc5e9606b5cc2ed77f4cb9fdbde5f Mon Sep 17 00:00:00 2001
From: Hanno Becker <hanno.becker@arm.com>
Date: Mon, 10 Jan 2022 12:25:05 +0000
Subject: [PATCH 21/21] Make more use of helper function for init/free of MPI
 array

Signed-off-by: Hanno Becker <hanno.becker@arm.com>
---
 library/ecp.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 5758ca9799..ba76abbd15 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -342,13 +342,13 @@ int mbedtls_ecp_check_budget( const mbedtls_ecp_group *grp,
 
 #endif /* MBEDTLS_ECP_RESTARTABLE */
 
-static void mpi_init_many( mbedtls_mpi *arr, unsigned size )
+static void mpi_init_many( mbedtls_mpi *arr, size_t size )
 {
     while( size-- )
         mbedtls_mpi_init( arr++ );
 }
 
-static void mpi_free_many( mbedtls_mpi *arr, unsigned size )
+static void mpi_free_many( mbedtls_mpi *arr, size_t size )
 {
     while( size-- )
         mbedtls_mpi_free( arr++ );
@@ -1340,11 +1340,9 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
     if( ( c = mbedtls_calloc( T_size, sizeof( mbedtls_mpi ) ) ) == NULL )
         return( MBEDTLS_ERR_ECP_ALLOC_FAILED );
 
-    for( i = 0; i < T_size; i++ )
-        mbedtls_mpi_init( &c[i] );
-
     mbedtls_mpi_init( &t );
 
+    mpi_init_many( c, T_size );
     /*
      * c[i] = Z_0 * ... * Z_i,   i = 0,..,n := T_size-1
      */
@@ -1408,8 +1406,7 @@ static int ecp_normalize_jac_many( const mbedtls_ecp_group *grp,
 cleanup:
 
     mbedtls_mpi_free( &t );
-    for( i = 0; i < T_size; i++ )
-        mbedtls_mpi_free( &c[i] );
+    mpi_free_many( c, T_size );
     mbedtls_free( c );
 
     return( ret );