From 8e7d6a03869e4578c16d98bcc7309a09df4dc776 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 4 Oct 2022 13:27:40 +0100
Subject: [PATCH 01/16] mpi_exp_mod: load the output variable to the table

This is done in preparation for constant time loading that will be added
in a later commit.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 8717c8abc6..1c42ef21ba 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -1974,7 +1974,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
-    mbedtls_mpi RR, T, W[ 1 << MBEDTLS_MPI_WINDOW_SIZE ], WW, Apos;
+    mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2021,6 +2021,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
 
+    /*
+     * Append the output variable to the end of the  table for constant time
+     * lookup. From this point on we need to use the table entry in each
+     * calculation, this makes it safe to use simple assignment.
+     */
+    const size_t x_index = sizeof( W ) / sizeof( W[0] ) - 1;
+    W[x_index] = *X;
+
     /*
      * Compensate for negative A (and correct at the end)
      */
@@ -2066,10 +2074,10 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     mpi_montmul( &W[1], &RR, N, mm, &T );
 
     /*
-     * X = R^2 * R^-1 mod N = R mod N
+     * W[x_index] = R^2 * R^-1 mod N = R mod N
      */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &RR ) );
-    mpi_montred( X, N, mm, &T );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[x_index], &RR ) );
+    mpi_montred( &W[x_index], N, mm, &T );
 
     if( wsize > 1 )
     {
@@ -2127,9 +2135,9 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         if( ei == 0 && state == 1 )
         {
             /*
-             * out of window, square X
+             * out of window, square W[x_index]
              */
-            mpi_montmul( X, X, N, mm, &T );
+            mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
             continue;
         }
 
@@ -2144,16 +2152,16 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         if( nbits == wsize )
         {
             /*
-             * X = X^wsize R^-1 mod N
+             * W[x_index] = W[x_index]^wsize R^-1 mod N
              */
             for( i = 0; i < wsize; i++ )
-                mpi_montmul( X, X, N, mm, &T );
+                mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
 
             /*
-             * X = X * W[wbits] R^-1 mod N
+             * W[x_index] = W[x_index] * W[wbits] R^-1 mod N
              */
             MBEDTLS_MPI_CHK( mpi_select( &WW, W, (size_t) 1 << wsize, wbits ) );
-            mpi_montmul( X, &WW, N, mm, &T );
+            mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
             state--;
             nbits = 0;
@@ -2166,25 +2174,30 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      */
     for( i = 0; i < nbits; i++ )
     {
-        mpi_montmul( X, X, N, mm, &T );
+        mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
 
         wbits <<= 1;
 
         if( ( wbits & ( one << wsize ) ) != 0 )
-            mpi_montmul( X, &W[1], N, mm, &T );
+            mpi_montmul( &W[x_index], &W[1], N, mm, &T );
     }
 
     /*
-     * X = A^E * R * R^-1 mod N = A^E mod N
+     * W[x_index] = A^E * R * R^-1 mod N = A^E mod N
      */
-    mpi_montred( X, N, mm, &T );
+    mpi_montred( &W[x_index], N, mm, &T );
 
     if( neg && E->n != 0 && ( E->p[0] & 1 ) != 0 )
     {
-        X->s = -1;
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, N, X ) );
+        W[x_index].s = -1;
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &W[x_index], N, &W[x_index] ) );
     }
 
+    /*
+     * Load the result in the output variable.
+     */
+    *X = W[x_index];
+
 cleanup:
 
     for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ )

From b764ee1603bc03d3838ed5814cd1da6940a92fac Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 4 Oct 2022 14:00:09 +0100
Subject: [PATCH 02/16] mpi_exp_mod: protect out of window zeroes

Out of window zeroes were doing squaring on the output variable
directly. This leaks the position of windows and the out of window
zeroes.

Loading the output variable from the table in constant time removes this
leakage.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 1c42ef21ba..2ba6b7c97f 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -1975,6 +1975,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
     mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
+    const size_t w_count = sizeof( W ) / sizeof( W[0] );
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2026,7 +2027,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * lookup. From this point on we need to use the table entry in each
      * calculation, this makes it safe to use simple assignment.
      */
-    const size_t x_index = sizeof( W ) / sizeof( W[0] ) - 1;
+    const size_t x_index = w_count - 1;
     W[x_index] = *X;
 
     /*
@@ -2137,7 +2138,8 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
             /*
              * out of window, square W[x_index]
              */
-            mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+            mpi_montmul( &W[x_index], &WW, N, mm, &T );
             continue;
         }
 
@@ -2155,12 +2157,15 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
              * W[x_index] = W[x_index]^wsize R^-1 mod N
              */
             for( i = 0; i < wsize; i++ )
-                mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
+            {
+                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+                mpi_montmul( &W[x_index], &WW, N, mm, &T );
+            }
 
             /*
              * W[x_index] = W[x_index] * W[wbits] R^-1 mod N
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, (size_t) 1 << wsize, wbits ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, wbits ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
             state--;
@@ -2174,12 +2179,16 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      */
     for( i = 0; i < nbits; i++ )
     {
-        mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
+        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+        mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
         wbits <<= 1;
 
         if( ( wbits & ( one << wsize ) ) != 0 )
-            mpi_montmul( &W[x_index], &W[1], N, mm, &T );
+        {
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, 1 ) );
+            mpi_montmul( &W[x_index], &WW, N, mm, &T );
+        }
     }
 
     /*

From b3608afe29b7743de13923e6683ae74e34f77b46 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 4 Oct 2022 14:57:17 +0100
Subject: [PATCH 03/16] Add ChangeLog entry

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 ChangeLog.d/rsa-fix-priviliged-side-channel.txt | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 ChangeLog.d/rsa-fix-priviliged-side-channel.txt

diff --git a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
new file mode 100644
index 0000000000..d4ffa915ca
--- /dev/null
+++ b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
@@ -0,0 +1,8 @@
+Security
+   * An adversary with access to precise enough information about memory
+     accesses (typically, an untrusted operating system attacking a secure
+     enclave) could recover an RSA private key after observing the victim
+     performing a single private-key operation if the window size used for the
+     exponentiation was 3 or smaller. Found and reported by Zili KOU,
+     Wenjian HE, Sharad Sinha, and Wei ZHANG.
+

From f08b40eaabfc22b2422e8bd300f3ccdfcaded591 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Fri, 11 Nov 2022 15:56:38 +0000
Subject: [PATCH 04/16] mpi_exp_mod: improve documentation

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 2ba6b7c97f..dbf5295052 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2023,11 +2023,20 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
 
     /*
-     * Append the output variable to the end of the  table for constant time
-     * lookup. From this point on we need to use the table entry in each
-     * calculation, this makes it safe to use simple assignment.
+     * If we call mpi_montmul() without doing a table lookup first, we leak
+     * through timing side channels the fact that a squaring is happening. In
+     * some strong attack settings this can be enough to defeat blinding.
+     *
+     * To prevent this leak, we append the output variable to the end of the
+     * table. This allows as to always do a constant time lookup whenever we
+     * call mpi_montmul().
      */
     const size_t x_index = w_count - 1;
+    /*
+     * To prevent the leak, we need to use the table entry in each calculation
+     * from this point on. This makes it safe to load X into the table by a
+     * simple assignment.
+     */
     W[x_index] = *X;
 
     /*

From 844614814e14e3ab691763263b5102affbdaf7f1 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 14:31:22 +0000
Subject: [PATCH 05/16] mpi_exp_mod: remove memory ownership confusion

Elements of W didn't all have the same owner: all were owned by this
function, except W[x_index]. It is more robust if we make a proper copy
of X.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index dbf5295052..a564edf6cc 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2012,16 +2012,6 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         wsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
 
-    j = N->n + 1;
-    /* All W[i] and X must have at least N->n limbs for the mpi_montmul()
-     * and mpi_montred() calls later. Here we ensure that W[1] and X are
-     * large enough, and later we'll grow other W[i] to the same length.
-     * They must not be shrunk midway through this function!
-     */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
-
     /*
      * If we call mpi_montmul() without doing a table lookup first, we leak
      * through timing side channels the fact that a squaring is happening. In
@@ -2030,14 +2020,23 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * To prevent this leak, we append the output variable to the end of the
      * table. This allows as to always do a constant time lookup whenever we
      * call mpi_montmul().
+     *
+     * To achieve this, we make a copy of X and we use the table entry in each
+     * calculation from this point on.
      */
     const size_t x_index = w_count - 1;
-    /*
-     * To prevent the leak, we need to use the table entry in each calculation
-     * from this point on. This makes it safe to load X into the table by a
-     * simple assignment.
+    mbedtls_mpi_init( &W[x_index] );
+    mbedtls_mpi_copy( &W[x_index], X );
+
+    j = N->n + 1;
+    /* All W[i] and X must have at least N->n limbs for the mpi_montmul()
+     * and mpi_montred() calls later. Here we ensure that W[1] and X are
+     * large enough, and later we'll grow other W[i] to the same length.
+     * They must not be shrunk midway through this function!
      */
-    W[x_index] = *X;
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[x_index], j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
 
     /*
      * Compensate for negative A (and correct at the end)
@@ -2214,14 +2213,17 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     /*
      * Load the result in the output variable.
      */
-    *X = W[x_index];
+    mbedtls_mpi_copy( X, &W[x_index] );
 
 cleanup:
 
     for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ )
         mbedtls_mpi_free( &W[i] );
 
-    mbedtls_mpi_free( &W[1] ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &Apos );
+    mbedtls_mpi_free( &W[1] );
+    mbedtls_mpi_free( &W[x_index] );
+    mbedtls_mpi_free( &T );
+    mbedtls_mpi_free( &Apos );
     mbedtls_mpi_free( &WW );
 
     if( prec_RR == NULL || prec_RR->p == NULL )

From 7fa11b88f3d223edca5bf3169b8b2fe93b498210 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 14:48:02 +0000
Subject: [PATCH 06/16] mpi_exp_mod: rename local variables

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 54 +++++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index a564edf6cc..b611c31105 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -1970,12 +1970,12 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
                          mbedtls_mpi *prec_RR )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t wbits, wsize, one = 1;
+    size_t window_bitsize, one = 1;
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
     mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
-    const size_t w_count = sizeof( W ) / sizeof( W[0] );
+    const size_t w_table_size = sizeof( W ) / sizeof( W[0] );
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2004,12 +2004,12 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
     i = mbedtls_mpi_bitlen( E );
 
-    wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
+    window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
-    if( wsize > MBEDTLS_MPI_WINDOW_SIZE )
-        wsize = MBEDTLS_MPI_WINDOW_SIZE;
+    if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )
+        window_bitsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
 
     /*
@@ -2024,7 +2024,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.
      */
-    const size_t x_index = w_count - 1;
+    const size_t x_index = w_table_size - 1;
     mbedtls_mpi_init( &W[x_index] );
     mbedtls_mpi_copy( &W[x_index], X );
 
@@ -2088,23 +2088,23 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[x_index], &RR ) );
     mpi_montred( &W[x_index], N, mm, &T );
 
-    if( wsize > 1 )
+    if( window_bitsize > 1 )
     {
         /*
-         * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1)
+         * W[1 << (window_bitsize - 1)] = W[1] ^ (window_bitsize - 1)
          */
-        j =  one << ( wsize - 1 );
+        j =  one << ( window_bitsize - 1 );
 
         MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1]    ) );
 
-        for( i = 0; i < wsize - 1; i++ )
+        for( i = 0; i < window_bitsize - 1; i++ )
             mpi_montmul( &W[j], &W[j], N, mm, &T );
 
         /*
          * W[i] = W[i - 1] * W[1]
          */
-        for( i = j + 1; i < ( one << wsize ); i++ )
+        for( i = j + 1; i < ( one << window_bitsize ); i++ )
         {
             MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
             MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
@@ -2116,7 +2116,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     nblimbs = E->n;
     bufsize = 0;
     nbits   = 0;
-    wbits   = 0;
+    size_t exponent_bits_in_window = 0;
     state   = 0;
 
     while( 1 )
@@ -2146,7 +2146,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
             /*
              * out of window, square W[x_index]
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
             continue;
         }
@@ -2157,28 +2157,29 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         state = 2;
 
         nbits++;
-        wbits |= ( ei << ( wsize - nbits ) );
+        exponent_bits_in_window |= ( ei << ( window_bitsize - nbits ) );
 
-        if( nbits == wsize )
+        if( nbits == window_bitsize )
         {
             /*
-             * W[x_index] = W[x_index]^wsize R^-1 mod N
+             * W[x_index] = W[x_index]^window_bitsize R^-1 mod N
              */
-            for( i = 0; i < wsize; i++ )
+            for( i = 0; i < window_bitsize; i++ )
             {
-                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
                 mpi_montmul( &W[x_index], &WW, N, mm, &T );
             }
 
             /*
-             * W[x_index] = W[x_index] * W[wbits] R^-1 mod N
+             * W[x_index] = W[x_index] * W[exponent_bits_in_window] R^-1 mod N
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, wbits ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size,
+                                         exponent_bits_in_window ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
             state--;
             nbits = 0;
-            wbits = 0;
+            exponent_bits_in_window = 0;
         }
     }
 
@@ -2187,14 +2188,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      */
     for( i = 0; i < nbits; i++ )
     {
-        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
         mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
-        wbits <<= 1;
+        exponent_bits_in_window <<= 1;
 
-        if( ( wbits & ( one << wsize ) ) != 0 )
+        if( ( exponent_bits_in_window & ( one << window_bitsize ) ) != 0 )
         {
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, 1 ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, 1 ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
         }
     }
@@ -2217,7 +2218,8 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
 cleanup:
 
-    for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ )
+    for( i = ( one << ( window_bitsize - 1 ) );
+         i < ( one << window_bitsize ); i++ )
         mbedtls_mpi_free( &W[i] );
 
     mbedtls_mpi_free( &W[1] );

From 3646ff02ad96ed188734aed822c16d3815085465 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 14:55:05 +0000
Subject: [PATCH 07/16] mpi_exp_mod: move X next to the precomputed values

With small exponents (for example, when doing RSA-1024 with CRT, each
prime is 512 bits and we'll use wsize = 5 which may be smaller that the
maximum - or even worse when doing public RSA operations which typically
have a 16-bit exponent so we'll use wsize = 1) the usage of W will have
pre-computed values, then empty space, then the accumulator at the very
end.

Move X next to the precomputed values to make accesses more efficient
and intuitive.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index b611c31105..d3cd90d74e 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -1975,7 +1975,6 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
     mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
-    const size_t w_table_size = sizeof( W ) / sizeof( W[0] );
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2006,6 +2005,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
     window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
+    const size_t w_table_used_size = ( 1 << window_bitsize ) + 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
     if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )
@@ -2024,7 +2024,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.
      */
-    const size_t x_index = w_table_size - 1;
+    const size_t x_index = w_table_used_size - 1;
     mbedtls_mpi_init( &W[x_index] );
     mbedtls_mpi_copy( &W[x_index], X );
 
@@ -2146,7 +2146,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
             /*
              * out of window, square W[x_index]
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, x_index ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
             continue;
         }
@@ -2166,14 +2166,15 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
              */
             for( i = 0; i < window_bitsize; i++ )
             {
-                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size,
+                                             x_index ) );
                 mpi_montmul( &W[x_index], &WW, N, mm, &T );
             }
 
             /*
              * W[x_index] = W[x_index] * W[exponent_bits_in_window] R^-1 mod N
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size,
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size,
                                          exponent_bits_in_window ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
@@ -2188,14 +2189,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      */
     for( i = 0; i < nbits; i++ )
     {
-        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, x_index ) );
         mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
         exponent_bits_in_window <<= 1;
 
         if( ( exponent_bits_in_window & ( one << window_bitsize ) ) != 0 )
         {
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, 1 ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, 1 ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
         }
     }

From b2c2fca974b81282d2fe747e27909a6681869d51 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 15:05:31 +0000
Subject: [PATCH 08/16] mpi_exp_mod: simplify freeing loop

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index d3cd90d74e..9b47739703 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2219,12 +2219,12 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
 cleanup:
 
-    for( i = ( one << ( window_bitsize - 1 ) );
-         i < ( one << window_bitsize ); i++ )
+    /* The first bit of the sliding window is always 1 and therefore the first
+     * half of the table was unused. */
+    for( i = w_table_used_size/2; i < w_table_used_size; i++ )
         mbedtls_mpi_free( &W[i] );
 
     mbedtls_mpi_free( &W[1] );
-    mbedtls_mpi_free( &W[x_index] );
     mbedtls_mpi_free( &T );
     mbedtls_mpi_free( &Apos );
     mbedtls_mpi_free( &WW );

From 74601209fa38426dc085c06aa9864e53b44bc996 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 15:54:20 +0000
Subject: [PATCH 09/16] mpi_exp_mod: remove the 'one' variable

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 9b47739703..4b2687b625 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -1970,7 +1970,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
                          mbedtls_mpi *prec_RR )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t window_bitsize, one = 1;
+    size_t window_bitsize;
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
@@ -2091,9 +2091,12 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     if( window_bitsize > 1 )
     {
         /*
-         * W[1 << (window_bitsize - 1)] = W[1] ^ (window_bitsize - 1)
+         * W[i] = W[1] ^ i
+         *
+         * The first bit of the sliding window is always 1 and therefore we
+         * only need to store the second half of the table.
          */
-        j =  one << ( window_bitsize - 1 );
+        j = w_table_used_size / 2;
 
         MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1]    ) );
@@ -2103,8 +2106,10 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
         /*
          * W[i] = W[i - 1] * W[1]
+         * (The last element in the table is for the result X, so we don't need
+         * to calculate that.)
          */
-        for( i = j + 1; i < ( one << window_bitsize ); i++ )
+        for( i = j + 1; i < w_table_used_size - 1; i++ )
         {
             MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
             MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
@@ -2194,7 +2199,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
         exponent_bits_in_window <<= 1;
 
-        if( ( exponent_bits_in_window & ( one << window_bitsize ) ) != 0 )
+        if( ( exponent_bits_in_window & ( (size_t) 1 << window_bitsize ) ) != 0 )
         {
             MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, 1 ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );

From be54ca77e243526fd7e9009882e90248e6c06c79 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 16:14:54 +0000
Subject: [PATCH 10/16] mpi_exp_mod: improve documentation

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 4b2687b625..b68a15e19b 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2013,13 +2013,33 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 #endif
 
     /*
-     * If we call mpi_montmul() without doing a table lookup first, we leak
-     * through timing side channels the fact that a squaring is happening. In
-     * some strong attack settings this can be enough to defeat blinding.
+     * This function is not constant-trace: its memory accesses depend on the
+     * exponent value. To defend against timing attacks, callers (such as RSA
+     * and DHM) should use exponent blinding. However this is not enough if the
+     * adversary can find the exponent in a single trace, so this function
+     * takes extra precautions against adversaries who can observe memory
+     * access patterns.
      *
-     * To prevent this leak, we append the output variable to the end of the
-     * table. This allows as to always do a constant time lookup whenever we
-     * call mpi_montmul().
+     * This function performs a series of multiplications by table elements and
+     * squarings, and we want the prevent the adversary from finding out which
+     * table element was used, and from distinguishing between multiplications
+     * and squarings. Firstly, when multiplying by an element of the window
+     * W[i], we do a constant-trace table lookup to obfuscate i. This leaves
+     * squarings as having a different memory access patterns from other
+     * multiplications. So secondly, we put the accumulator X in the table as
+     * well, and also do a constant-trace table lookup to multiply by X.
+     *
+     * This way, all multiplications take the form of a lookup-and-multiply.
+     * The number of lookup-and-multiply operations inside each iteration of
+     * the main loop still depends on the bits of the exponent, but since the
+     * other operations in the loop don't have an easily recognizable memory
+     * trace, an adversary is unlikely to be able to observe the exact
+     * patterns.
+     *
+     * An adversary may still be able to recover the exponent if they can
+     * observe both memory accesses and branches. However, branch prediction
+     * exploitation typically requires many traces of execution over the same
+     * data, which is defeated by randomized blinding.
      *
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.

From 74369b2497ed0c8e8ad2d9eed604a6b89804c222 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 16:22:35 +0000
Subject: [PATCH 11/16] Add paper title to Changelog

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 ChangeLog.d/rsa-fix-priviliged-side-channel.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
index d4ffa915ca..f112eae433 100644
--- a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
+++ b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
@@ -4,5 +4,6 @@ Security
      enclave) could recover an RSA private key after observing the victim
      performing a single private-key operation if the window size used for the
      exponentiation was 3 or smaller. Found and reported by Zili KOU,
-     Wenjian HE, Sharad Sinha, and Wei ZHANG.
+     Wenjian HE, Sharad Sinha, and Wei ZHANG. See "Cache Side-channel Attacks
+     and Defenses of the Sliding Window Algorithm in TEEs" - DATE 2023.
 

From 9c09326572b9baaac9530f9bd688c65ca241c010 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 10:15:00 +0000
Subject: [PATCH 12/16] mpi_mod_exp: be pedantic about right shift

The window size starts giving diminishing returns around 6 on most
platforms and highly unlikely to be more than 31 in practical use cases.
Still, compilers and static analysers might complain about this and
better to be pedantic.

Co-authored-by: Gilles Peskine <gilles.peskine@arm.com>
Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/bignum.c b/library/bignum.c
index b68a15e19b..53d16495b3 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2005,7 +2005,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
     window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
-    const size_t w_table_used_size = ( 1 << window_bitsize ) + 1;
+    const size_t w_table_used_size = ( (size_t)1 << window_bitsize ) + 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
     if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )

From 060009518b8cd6c65f0b32ae02da831235c7ef2a Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 10:18:06 +0000
Subject: [PATCH 13/16] mpi_exp_mod: fix out of bounds access

The table size was set before the configured window size bound was
applied which lead to out of bounds access when the configured window
size bound is less.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/library/bignum.c b/library/bignum.c
index 53d16495b3..618da377fc 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2005,13 +2005,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
     window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
-    const size_t w_table_used_size = ( (size_t)1 << window_bitsize ) + 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
     if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )
         window_bitsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
 
+    const size_t w_table_used_size = ( (size_t) 1 << window_bitsize ) + 1;
+
     /*
      * This function is not constant-trace: its memory accesses depend on the
      * exponent value. To defend against timing attacks, callers (such as RSA

From c8d66d50d06e3fb801e4b87b4fec5658b300c191 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 10:47:10 +0000
Subject: [PATCH 14/16] mpi_exp_mod: reduce the table size by one

The first half of the table is not used, let's reuse index 0 for the
result instead of appending it in the end.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 618da377fc..993eb2aed9 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -1974,7 +1974,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
-    mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
+    mbedtls_mpi RR, T, W[ (size_t) 1 << MBEDTLS_MPI_WINDOW_SIZE ], WW, Apos;
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2011,7 +2011,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         window_bitsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
 
-    const size_t w_table_used_size = ( (size_t) 1 << window_bitsize ) + 1;
+    const size_t w_table_used_size = (size_t) 1 << window_bitsize;
 
     /*
      * This function is not constant-trace: its memory accesses depend on the
@@ -2045,7 +2045,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.
      */
-    const size_t x_index = w_table_used_size - 1;
+    const size_t x_index = 0;
     mbedtls_mpi_init( &W[x_index] );
     mbedtls_mpi_copy( &W[x_index], X );
 
@@ -2109,6 +2109,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[x_index], &RR ) );
     mpi_montred( &W[x_index], N, mm, &T );
 
+
     if( window_bitsize > 1 )
     {
         /*
@@ -2116,6 +2117,10 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
          *
          * The first bit of the sliding window is always 1 and therefore we
          * only need to store the second half of the table.
+         *
+         * (There are two special elements in the table: W[0] for the
+         * accumulator/result and W[1] for A in Montgomery form. Both of these
+         * are already set at this point.)
          */
         j = w_table_used_size / 2;
 
@@ -2127,10 +2132,8 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
         /*
          * W[i] = W[i - 1] * W[1]
-         * (The last element in the table is for the result X, so we don't need
-         * to calculate that.)
          */
-        for( i = j + 1; i < w_table_used_size - 1; i++ )
+        for( i = j + 1; i < w_table_used_size; i++ )
         {
             MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
             MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
@@ -2250,6 +2253,7 @@ cleanup:
     for( i = w_table_used_size/2; i < w_table_used_size; i++ )
         mbedtls_mpi_free( &W[i] );
 
+    mbedtls_mpi_free( &W[0] );
     mbedtls_mpi_free( &W[1] );
     mbedtls_mpi_free( &T );
     mbedtls_mpi_free( &Apos );

From 33480a372b4c618cc3ad3b6b16b0f42e37fad94c Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 10:51:25 +0000
Subject: [PATCH 15/16] Changelog: expand conference acronym for clarity

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 ChangeLog.d/rsa-fix-priviliged-side-channel.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
index f112eae433..bafe18d308 100644
--- a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
+++ b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
@@ -5,5 +5,6 @@ Security
      performing a single private-key operation if the window size used for the
      exponentiation was 3 or smaller. Found and reported by Zili KOU,
      Wenjian HE, Sharad Sinha, and Wei ZHANG. See "Cache Side-channel Attacks
-     and Defenses of the Sliding Window Algorithm in TEEs" - DATE 2023.
+     and Defenses of the Sliding Window Algorithm in TEEs" - Design, Automation
+     and Test in Europe 2023.
 

From 3165f063b56e1e233875f529a8b01dbd771cce60 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 15:00:46 +0000
Subject: [PATCH 16/16] mpi_exp_mod: use x_index consistently

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/bignum.c b/library/bignum.c
index 993eb2aed9..3f81d5de3b 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2253,7 +2253,7 @@ cleanup:
     for( i = w_table_used_size/2; i < w_table_used_size; i++ )
         mbedtls_mpi_free( &W[i] );
 
-    mbedtls_mpi_free( &W[0] );
+    mbedtls_mpi_free( &W[x_index] );
     mbedtls_mpi_free( &W[1] );
     mbedtls_mpi_free( &T );
     mbedtls_mpi_free( &Apos );