From 161ef968db767b9f98592567d19f66959c619e30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Tue, 17 Sep 2013 19:13:10 +0200
Subject: [PATCH] Cache pre-computed points for ecp_mul()

Up to 1.25 speedup on ECDSA sign for small curves, but mainly useful as a
preparation for fixed-point mult (a few prototypes changed in constness).
---
 include/polarssl/ecdh.h  |   4 +-
 include/polarssl/ecdsa.h |   4 +-
 include/polarssl/ecp.h   |  11 ++-
 library/ecdh.c           |   4 +-
 library/ecdsa.c          |   4 +-
 library/ecp.c            | 167 +++++++++++++++++++++++++++++++--------
 6 files changed, 147 insertions(+), 47 deletions(-)

diff --git a/include/polarssl/ecdh.h b/include/polarssl/ecdh.h
index 0fa2dfae1a..81c8f937a0 100644
--- a/include/polarssl/ecdh.h
+++ b/include/polarssl/ecdh.h
@@ -62,7 +62,7 @@ ecdh_context;
  * \return          0 if successful,
  *                  or a POLARSSL_ERR_ECP_XXX or POLARSSL_MPI_XXX error code
  */
-int ecdh_gen_public( const ecp_group *grp, mpi *d, ecp_point *Q,
+int ecdh_gen_public( ecp_group *grp, mpi *d, ecp_point *Q,
                      int (*f_rng)(void *, unsigned char *, size_t),
                      void *p_rng );
 
@@ -83,7 +83,7 @@ int ecdh_gen_public( const ecp_group *grp, mpi *d, ecp_point *Q,
  *                  countermeasures against potential elaborate timing
  *                  attacks, see \c ecp_mul() for details.
  */
-int ecdh_compute_shared( const ecp_group *grp, mpi *z,
+int ecdh_compute_shared( ecp_group *grp, mpi *z,
                          const ecp_point *Q, const mpi *d,
                          int (*f_rng)(void *, unsigned char *, size_t),
                          void *p_rng );
diff --git a/include/polarssl/ecdsa.h b/include/polarssl/ecdsa.h
index 3159d893c2..2de3b68d77 100644
--- a/include/polarssl/ecdsa.h
+++ b/include/polarssl/ecdsa.h
@@ -63,7 +63,7 @@ extern "C" {
  * \return          0 if successful,
  *                  or a POLARSSL_ERR_ECP_XXX or POLARSSL_MPI_XXX error code
  */
-int ecdsa_sign( const ecp_group *grp, mpi *r, mpi *s,
+int ecdsa_sign( ecp_group *grp, mpi *r, mpi *s,
                 const mpi *d, const unsigned char *buf, size_t blen,
                 int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
 
@@ -81,7 +81,7 @@ int ecdsa_sign( const ecp_group *grp, mpi *r, mpi *s,
  *                  POLARSSL_ERR_ECP_BAD_INPUT_DATA if signature is invalid
  *                  or a POLARSSL_ERR_ECP_XXX or POLARSSL_MPI_XXX error code
  */
-int ecdsa_verify( const ecp_group *grp,
+int ecdsa_verify( ecp_group *grp,
                   const unsigned char *buf, size_t blen,
                   const ecp_point *Q, const mpi *r, const mpi *s);
 
diff --git a/include/polarssl/ecp.h b/include/polarssl/ecp.h
index 513b355a1c..cd1568cf7d 100644
--- a/include/polarssl/ecp.h
+++ b/include/polarssl/ecp.h
@@ -155,16 +155,15 @@ ecp_keypair;
 
 /*
  * Maximum window size (actually, NAF width) used for point multipliation.
- * Default: 7.
+ * Default: 8.
  * Minimum value: 2. Maximum value: 8.
  *
  * Result is an array of at most ( 1 << ( POLARSSL_ECP_WINDOW_SIZE - 1 ) )
- * points used for point multiplication, so at most 64 by default.
- * In practice, most curves will use less precomputed points.
+ * points used for point multiplication.
  *
  * Reduction in size may reduce speed for big curves.
  */
-#define POLARSSL_ECP_WINDOW_SIZE    7   /**< Maximum NAF width used. */
+#define POLARSSL_ECP_WINDOW_SIZE    8   /**< Maximum NAF width used. */
 
 /*
  * Point formats, from RFC 4492's enum ECPointFormat
@@ -472,7 +471,7 @@ int ecp_sub( const ecp_group *grp, ecp_point *R,
  *                  has very low overhead, it is recommended to always provide
  *                  a non-NULL f_rng parameter when using secret inputs.
  */
-int ecp_mul( const ecp_group *grp, ecp_point *R,
+int ecp_mul( ecp_group *grp, ecp_point *R,
              const mpi *m, const ecp_point *P,
              int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
 
@@ -531,7 +530,7 @@ int ecp_check_privkey( const ecp_group *grp, const mpi *d );
  *                  in order to ease use with other structures such as
  *                  ecdh_context of ecdsa_context.
  */
-int ecp_gen_keypair( const ecp_group *grp, mpi *d, ecp_point *Q,
+int ecp_gen_keypair( ecp_group *grp, mpi *d, ecp_point *Q,
                      int (*f_rng)(void *, unsigned char *, size_t),
                      void *p_rng );
 
diff --git a/library/ecdh.c b/library/ecdh.c
index 050f99dfcb..400e45fa2e 100644
--- a/library/ecdh.c
+++ b/library/ecdh.c
@@ -39,7 +39,7 @@
 /*
  * Generate public key: simple wrapper around ecp_gen_keypair
  */
-int ecdh_gen_public( const ecp_group *grp, mpi *d, ecp_point *Q,
+int ecdh_gen_public( ecp_group *grp, mpi *d, ecp_point *Q,
                      int (*f_rng)(void *, unsigned char *, size_t),
                      void *p_rng )
 {
@@ -49,7 +49,7 @@ int ecdh_gen_public( const ecp_group *grp, mpi *d, ecp_point *Q,
 /*
  * Compute shared secret (SEC1 3.3.1)
  */
-int ecdh_compute_shared( const ecp_group *grp, mpi *z,
+int ecdh_compute_shared( ecp_group *grp, mpi *z,
                          const ecp_point *Q, const mpi *d,
                          int (*f_rng)(void *, unsigned char *, size_t),
                          void *p_rng )
diff --git a/library/ecdsa.c b/library/ecdsa.c
index 8d0b2b5902..91777ec1d3 100644
--- a/library/ecdsa.c
+++ b/library/ecdsa.c
@@ -51,7 +51,7 @@ static int derive_mpi( const ecp_group *grp, mpi *x,
  * Compute ECDSA signature of a hashed message (SEC1 4.1.3)
  * Obviously, compared to SEC1 4.1.3, we skip step 4 (hash message)
  */
-int ecdsa_sign( const ecp_group *grp, mpi *r, mpi *s,
+int ecdsa_sign( ecp_group *grp, mpi *r, mpi *s,
                 const mpi *d, const unsigned char *buf, size_t blen,
                 int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
 {
@@ -117,7 +117,7 @@ cleanup:
  * Verify ECDSA signature of hashed message (SEC1 4.1.4)
  * Obviously, compared to SEC1 4.1.3, we skip step 2 (hash message)
  */
-int ecdsa_verify( const ecp_group *grp,
+int ecdsa_verify( ecp_group *grp,
                   const unsigned char *buf, size_t blen,
                   const ecp_point *Q, const mpi *r, const mpi *s)
 {
diff --git a/library/ecp.c b/library/ecp.c
index 48ce5d3745..0b6650d43c 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -149,6 +149,8 @@ void ecp_point_free( ecp_point *pt )
  */
 void ecp_group_free( ecp_group *grp )
 {
+    size_t i;
+
     if( grp == NULL )
         return;
 
@@ -157,6 +159,13 @@ void ecp_group_free( ecp_group *grp )
     ecp_point_free( &grp->G );
     mpi_free( &grp->N );
 
+    if( grp->T != NULL )
+    {
+        for( i = 0; i < grp->T_size; i++ )
+            ecp_point_free( &grp->T[i] );
+        polarssl_free( grp->T );
+    }
+
     memset( grp, 0, sizeof( ecp_group ) );
 }
 
@@ -1279,34 +1288,53 @@ cleanup:
  * This function executes a fixed number of operations for
  * random m in the range 0 .. 2^nbits - 1.
  *
- * As an additional countermeasure against potential elaborate timing attacks,
- * we randomize coordinates after each addition. This was suggested as a
+ * As an additional countermeasure against potential timing attacks,
+ * we randomize coordinates before each addition. This was suggested as a
  * countermeasure against DPA in 5.3 of [2] (with the obvious adaptation that
  * we use jacobian coordinates, not standard projective coordinates).
  */
-int ecp_mul( const ecp_group *grp, ecp_point *R,
+int ecp_mul( ecp_group *grp, ecp_point *R,
              const mpi *m, const ecp_point *P,
              int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
 {
     int ret;
-    unsigned char w, m_is_odd;
+    unsigned char w, m_is_odd, p_eq_g;
     size_t pre_len, naf_len, i, j;
     signed char naf[ MAX_NAF_LEN ];
-    ecp_point Q, T[ MAX_PRE_LEN ];
+    ecp_point Q, *T = NULL, S[2];
     mpi M;
 
     if( mpi_cmp_int( m, 0 ) < 0 || mpi_msb( m ) > grp->nbits )
         return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
 
-    w = grp->nbits >= 521 ? 6 :
-        grp->nbits >= 224 ? 5 :
-                            4;
+    mpi_init( &M );
+    ecp_point_init( &Q );
+    ecp_point_init( &S[0] );
+    ecp_point_init( &S[1] );
+
+    /*
+     * Check if P == G
+     */
+    p_eq_g = ( mpi_cmp_int( &P->Z, 1 ) == 0 &&
+               mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
+               mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
+
+    /*
+     * If P == G, pre-compute a lot of points: this will be re-used later,
+     * otherwise, choose window size depending on curve size
+     */
+    if( p_eq_g )
+        w = POLARSSL_ECP_WINDOW_SIZE;
+    else
+        w = grp->nbits >= 512 ? 6 :
+            grp->nbits >= 224 ? 5 :
+                                4;
 
     /*
      * Make sure w is within the limits.
      * The last test ensures that none of the precomputed points is zero,
      * which wouldn't be handled correctly by ecp_normalize_many().
-     * It is only useful for very small curves, as used in the test suite.
+     * It is only useful for very small curves as used in the test suite.
      */
     if( w > POLARSSL_ECP_WINDOW_SIZE )
         w = POLARSSL_ECP_WINDOW_SIZE;
@@ -1316,25 +1344,54 @@ int ecp_mul( const ecp_group *grp, ecp_point *R,
     pre_len = 1 << ( w - 1 );
     naf_len = grp->nbits / w + 1;
 
-    mpi_init( &M );
-    ecp_point_init( &Q );
-    for( i = 0; i < pre_len; i++ )
-        ecp_point_init( &T[i] );
+    /*
+     * Prepare precomputed points: if P == G we want to
+     * use grp->T if already initialized, or initiliaze it.
+     */
+    if( ! p_eq_g || grp->T == NULL )
+    {
+        if( ( T = polarssl_malloc( pre_len * sizeof( ecp_point ) ) ) == NULL )
+        {
+            ret = POLARSSL_ERR_ECP_MALLOC_FAILED;
+            goto cleanup;
+        }
 
-    m_is_odd = ( mpi_get_bit( m, 0 ) == 1 );
+        for( i = 0; i < pre_len; i++ )
+            ecp_point_init( &T[i] );
+
+        MPI_CHK( ecp_precompute( grp, T, pre_len, P ) );
+
+        if( p_eq_g )
+        {
+            grp->T = T;
+            grp->T_size = pre_len;
+        }
+    }
+    else
+    {
+        T = grp->T;
+
+        /* Should never happen, but we want to be extra sure */
+        if( pre_len != grp->T_size )
+        {
+            ret = POLARSSL_ERR_ECP_BAD_INPUT_DATA;
+            goto cleanup;
+        }
+    }
 
     /*
-     * Make sure M is odd:
-     * later we'll get m * P by subtracting * P or 2 * P to M * P.
+     * Make sure M is odd (M = m + 1 or M = m + 2)
+     * later we'll get m * P by subtracting P or 2 * P to M * P.
      */
+    m_is_odd = ( mpi_get_bit( m, 0 ) == 1 );
+
     MPI_CHK( mpi_copy( &M, m ) );
     MPI_CHK( mpi_add_int( &M, &M, 1 + m_is_odd ) );
 
     /*
-     * Compute the fixed-pattern NAF and precompute odd multiples
+     * Compute the fixed-pattern NAF of M
      */
     MPI_CHK( ecp_w_naf_fixed( naf, naf_len, w, &M ) );
-    MPI_CHK( ecp_precompute( grp, T, pre_len, P ) );
 
     /*
      * Compute M * P, using a variant of left-to-right 2^w-ary multiplication:
@@ -1348,6 +1405,10 @@ int ecp_mul( const ecp_group *grp, ecp_point *R,
     i = naf_len - 1;
     while( 1 )
     {
+        /* Countermeasure (see comments above) */
+        if( f_rng != NULL )
+            ecp_randomize_coordinates( grp, &Q, f_rng, p_rng );
+
         if( naf[i] < 0 )
         {
             MPI_CHK( ecp_add_mixed( grp, &Q, &Q, &T[ - naf[i] - 1 ], -1 ) );
@@ -1357,10 +1418,6 @@ int ecp_mul( const ecp_group *grp, ecp_point *R,
             MPI_CHK( ecp_add_mixed( grp, &Q, &Q, &T[ naf[i] ], +1 ) );
         }
 
-        /* Countermeasure (see comments above) */
-        if( f_rng != NULL )
-            ecp_randomize_coordinates( grp, &Q, f_rng, p_rng );
-
         if( i == 0 )
             break;
         i--;
@@ -1372,20 +1429,26 @@ int ecp_mul( const ecp_group *grp, ecp_point *R,
     }
 
     /*
-     * Now get m * P from M * P.
-     * Since we don't need T[] any more, we can recycle it:
-     * we already have T[0] = P, now set T[1] = 2 * P.
+     * Now get m * P from M * P
      */
-    MPI_CHK( ecp_add( grp, &T[1], P, P ) );
-    MPI_CHK( ecp_sub( grp, R, &Q, &T[m_is_odd] ) );
+    MPI_CHK( ecp_copy( &S[0], P ) );
+    MPI_CHK( ecp_add( grp, &S[1], P, P ) );
+    MPI_CHK( ecp_sub( grp, R, &Q, &S[m_is_odd] ) );
 
 
 cleanup:
 
-    mpi_free( &M );
+    if( T != NULL && ! p_eq_g )
+    {
+        for( i = 0; i < pre_len; i++ )
+            ecp_point_free( &T[i] );
+        polarssl_free( T );
+    }
+
+    ecp_point_free( &S[1] );
+    ecp_point_free( &S[0] );
     ecp_point_free( &Q );
-    for( i = 0; i < pre_len; i++ )
-        ecp_point_free( &T[i] );
+    mpi_free( &M );
 
     return( ret );
 }
@@ -1450,7 +1513,7 @@ int ecp_check_privkey( const ecp_group *grp, const mpi *d )
 /*
  * Generate a keypair (SEC1 3.2.1)
  */
-int ecp_gen_keypair( const ecp_group *grp, mpi *d, ecp_point *Q,
+int ecp_gen_keypair( ecp_group *grp, mpi *d, ecp_point *Q,
                      int (*f_rng)(void *, unsigned char *, size_t),
                      void *p_rng )
 {
@@ -1485,7 +1548,7 @@ int ecp_self_test( int verbose )
     int ret;
     size_t i;
     ecp_group grp;
-    ecp_point R;
+    ecp_point R, P;
     mpi m;
     unsigned long add_c_prev, dbl_c_prev;
     const char *exponents[] =
@@ -1501,6 +1564,7 @@ int ecp_self_test( int verbose )
 
     ecp_group_init( &grp );
     ecp_point_init( &R );
+    ecp_point_init( &P );
     mpi_init( &m );
 
 #if defined(POLARSSL_ECP_DP_SECP192R1_ENABLED)
@@ -1526,7 +1590,11 @@ int ecp_self_test( int verbose )
 #endif /* POLARSSL_ECP_DP_SECP192R1_ENABLED */
 
     if( verbose != 0 )
-        printf( "  ECP test #1 (resistance to simple timing attacks): " );
+        printf( "  ECP test #1 (constant op_count, base point G): " );
+
+    /* Do a dummy multiplication first to trigger precomputation */
+    MPI_CHK( mpi_lset( &m, 2 ) );
+    MPI_CHK( ecp_mul( &grp, &P, &m, &grp.G, NULL, NULL ) );
 
     add_count = 0;
     dbl_count = 0;
@@ -1556,6 +1624,38 @@ int ecp_self_test( int verbose )
     if( verbose != 0 )
         printf( "passed\n" );
 
+    if( verbose != 0 )
+        printf( "  ECP test #2 (constant op_count, other point): " );
+    /* We computed P = 2G last time, use it */
+
+    add_count = 0;
+    dbl_count = 0;
+    MPI_CHK( mpi_read_string( &m, 16, exponents[0] ) );
+    MPI_CHK( ecp_mul( &grp, &R, &m, &P, NULL, NULL ) );
+
+    for( i = 1; i < sizeof( exponents ) / sizeof( exponents[0] ); i++ )
+    {
+        add_c_prev = add_count;
+        dbl_c_prev = dbl_count;
+        add_count = 0;
+        dbl_count = 0;
+
+        MPI_CHK( mpi_read_string( &m, 16, exponents[i] ) );
+        MPI_CHK( ecp_mul( &grp, &R, &m, &P, NULL, NULL ) );
+
+        if( add_count != add_c_prev || dbl_count != dbl_c_prev )
+        {
+            if( verbose != 0 )
+                printf( "failed (%zu)\n", i );
+
+            ret = 1;
+            goto cleanup;
+        }
+    }
+
+    if( verbose != 0 )
+        printf( "passed\n" );
+
 cleanup:
 
     if( ret < 0 && verbose != 0 )
@@ -1563,6 +1663,7 @@ cleanup:
 
     ecp_group_free( &grp );
     ecp_point_free( &R );
+    ecp_point_free( &P );
     mpi_free( &m );
 
     if( verbose != 0 )