diff --git a/ChangeLog.d/mbedtls_sha256_a64_crypto_acceleration.txt b/ChangeLog.d/mbedtls_sha256_a64_crypto_acceleration.txt new file mode 100644 index 0000000000..865b3372ce --- /dev/null +++ b/ChangeLog.d/mbedtls_sha256_a64_crypto_acceleration.txt @@ -0,0 +1,2 @@ +Features + * A64 SHA-2 crypto extension support for SHA-256 diff --git a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h index d7cc7bcd64..a645819881 100644 --- a/include/mbedtls/check_config.h +++ b/include/mbedtls/check_config.h @@ -592,6 +592,28 @@ #error "MBEDTLS_SHA256_C defined without MBEDTLS_SHA224_C" #endif +#if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) && \ + defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) +#error "Must only define one of MBEDTLS_SHA256_USE_A64_CRYPTO_*" +#endif + +#if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) || \ + defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) +#if !defined(MBEDTLS_SHA256_C) +#error "MBEDTLS_SHA256_USE_A64_CRYPTO_* defined without MBEDTLS_SHA256_C" +#endif +#if defined(MBEDTLS_SHA256_ALT) || defined(MBEDTLS_SHA256_PROCESS_ALT) +#error "MBEDTLS_SHA256_*ALT can't be used with MBEDTLS_SHA256_USE_A64_CRYPTO_*" +#endif +#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO) +#error "Must use minimum -march=armv8-a+crypto for MBEDTLS_SHA256_USE_A64_CRYPTO_*" +#endif +#endif + +#if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) && !defined(__aarch64__) +#error "MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY defined on non-Aarch64 system" +#endif + #if defined(MBEDTLS_SSL_PROTO_TLS1_2) && ( !defined(MBEDTLS_SHA1_C) && \ !defined(MBEDTLS_SHA256_C) && !defined(MBEDTLS_SHA512_C) ) #error "MBEDTLS_SSL_PROTO_TLS1_2 defined, but not all prerequisites" diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h index a935c80fd8..1c631b5267 100644 --- a/include/mbedtls/mbedtls_config.h +++ b/include/mbedtls/mbedtls_config.h @@ -2759,6 +2759,56 @@ */ #define MBEDTLS_SHA256_C +/** + * \def MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT + * + * Enable acceleration of the SHA-256 cryptographic hash algorithm with the + * Arm A64 cryptographic extensions if they are available at runtime. If not, + * it will fall back to the C implementation. + * + * \note If MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT is defined when building + * for a non-Aarch64 build it will be silently ignored. + * + * \note The code uses Neon intrinsics, so \c CFLAGS must be set to a minimum + * of \c -march=armv8-a+crypto. + * + * \warning MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT cannot be defined at the + * same time as MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY. + * + * Requires: MBEDTLS_SHA256_C. + * + * Module: library/sha256.c + * + * Uncomment to have the library check for the A64 SHA-256 crypto extensions + * and use them if available. + */ +//#define MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT + +/** + * \def MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY + * + * Enable acceleration of the SHA-256 cryptographic hash algorithm with the + * Arm A64 cryptographic extensions, which must be available at runtime (or + * an illegal instruction fault will occur). + * + * \note This allows builds with a smaller code size than with + * MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT + * + * \note The code uses Neon intrinsics, so \c CFLAGS must be set to a minimum + * of \c -march=armv8-a+crypto. + * + * \warning MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY cannot be defined at the same + * time as MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT. + * + * Requires: MBEDTLS_SHA256_C. + * + * Module: library/sha256.c + * + * Uncomment to have the library use the A64 SHA-256 crypto extensions + * unconditionally. + */ +//#define MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY + /** * \def MBEDTLS_SHA384_C * diff --git a/library/sha256.c b/library/sha256.c index c3573f85fd..0db5f4d79d 100644 --- a/library/sha256.c +++ b/library/sha256.c @@ -44,12 +44,97 @@ #endif /* MBEDTLS_PLATFORM_C */ #endif /* MBEDTLS_SELF_TEST */ +#if defined(__aarch64__) +# if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) || \ + defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) +# include +# endif +# if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) && defined(__linux__) +# include +# endif +#else +# undef MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY +# undef MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT +#endif + +#if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) +/* + * Capability detection code comes early, so we can disable + * MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT if no detection mechanism found + */ +#if defined(HWCAP_SHA2) +static int mbedtls_a64_crypto_sha256_check_support( void ) +{ + return( ( getauxval( AT_HWCAP ) & HWCAP_SHA2 ) ? 1 : 0 ); +} +#elif defined(__APPLE__) +static int mbedtls_a64_crypto_sha256_check_support( void ) +{ + return( 1 ); +} +#elif defined(__unix__) && defined(SIG_SETMASK) +/* Detection with SIGILL, setjmp() and longjmp() */ +#include +#include + +#ifndef asm +#define asm __asm__ +#endif + +static jmp_buf return_from_sigill; + +/* + * A64 SHA256 support detection via SIGILL + */ +static void sigill_handler( int signal ) +{ + (void) signal; + longjmp( return_from_sigill, 1 ); +} + +static int mbedtls_a64_crypto_sha256_check_support( void ) +{ + struct sigaction old_action, new_action; + + sigset_t old_mask; + if( sigprocmask( 0, NULL, &old_mask ) ) + return( 0 ); + + sigemptyset( &new_action.sa_mask ); + new_action.sa_flags = 0; + new_action.sa_handler = sigill_handler; + + sigaction( SIGILL, &new_action, &old_action ); + + static int ret = 0; + + if( setjmp( return_from_sigill ) == 0 ) /* First return only */ + { + /* If this traps, we will return a second time from setjmp() with 1 */ + asm( "sha256h q0, q0, v0.4s" : : : "v0" ); + ret = 1; + } + + sigaction( SIGILL, &old_action, NULL ); + sigprocmask( SIG_SETMASK, &old_mask, NULL ); + + return( ret ); +} +#else +#warning "No mechanism to detect A64_CRYPTO found, using C code only" +#undef MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT +#endif /* HWCAP_SHA2, __APPLE__, __unix__ && SIG_SETMASK */ + +#endif /* MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT */ + #define SHA256_VALIDATE_RET(cond) \ MBEDTLS_INTERNAL_VALIDATE_RET( cond, MBEDTLS_ERR_SHA256_BAD_INPUT_DATA ) #define SHA256_VALIDATE(cond) MBEDTLS_INTERNAL_VALIDATE( cond ) #if !defined(MBEDTLS_SHA256_ALT) +#define SHA256_BLOCK_SIZE 64 + void mbedtls_sha256_init( mbedtls_sha256_context *ctx ) { SHA256_VALIDATE( ctx != NULL ); @@ -143,6 +228,132 @@ static const uint32_t K[] = 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, }; +#endif + +#if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) || \ + defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) + +#if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) +# define mbedtls_internal_sha256_process_many_a64_crypto mbedtls_internal_sha256_process_many +# define mbedtls_internal_sha256_process_a64_crypto mbedtls_internal_sha256_process +#endif + +static size_t mbedtls_internal_sha256_process_many_a64_crypto( + mbedtls_sha256_context *ctx, const uint8_t *msg, size_t len ) +{ + uint32x4_t abcd = vld1q_u32( &ctx->state[0] ); + uint32x4_t efgh = vld1q_u32( &ctx->state[4] ); + + size_t processed = 0; + + for( ; + len >= SHA256_BLOCK_SIZE; + processed += SHA256_BLOCK_SIZE, + msg += SHA256_BLOCK_SIZE, + len -= SHA256_BLOCK_SIZE ) + { + uint32x4_t tmp, abcd_prev; + + uint32x4_t abcd_orig = abcd; + uint32x4_t efgh_orig = efgh; + + uint32x4_t sched0 = vld1q_u32( (const uint32_t *)( msg + 16 * 0 ) ); + uint32x4_t sched1 = vld1q_u32( (const uint32_t *)( msg + 16 * 1 ) ); + uint32x4_t sched2 = vld1q_u32( (const uint32_t *)( msg + 16 * 2 ) ); + uint32x4_t sched3 = vld1q_u32( (const uint32_t *)( msg + 16 * 3 ) ); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ /* Will be true if not defined */ + /* Untested on BE */ + sched0 = vreinterpretq_u32_u8( vrev32q_u8( vreinterpretq_u8_u32( sched0 ) ) ); + sched1 = vreinterpretq_u32_u8( vrev32q_u8( vreinterpretq_u8_u32( sched1 ) ) ); + sched2 = vreinterpretq_u32_u8( vrev32q_u8( vreinterpretq_u8_u32( sched2 ) ) ); + sched3 = vreinterpretq_u32_u8( vrev32q_u8( vreinterpretq_u8_u32( sched3 ) ) ); +#endif + + /* Rounds 0 to 3 */ + tmp = vaddq_u32( sched0, vld1q_u32( &K[0] ) ); + abcd_prev = abcd; + abcd = vsha256hq_u32( abcd_prev, efgh, tmp ); + efgh = vsha256h2q_u32( efgh, abcd_prev, tmp ); + + /* Rounds 4 to 7 */ + tmp = vaddq_u32( sched1, vld1q_u32( &K[4] ) ); + abcd_prev = abcd; + abcd = vsha256hq_u32( abcd_prev, efgh, tmp ); + efgh = vsha256h2q_u32( efgh, abcd_prev, tmp ); + + /* Rounds 8 to 11 */ + tmp = vaddq_u32( sched2, vld1q_u32( &K[8] ) ); + abcd_prev = abcd; + abcd = vsha256hq_u32( abcd_prev, efgh, tmp ); + efgh = vsha256h2q_u32( efgh, abcd_prev, tmp ); + + /* Rounds 12 to 15 */ + tmp = vaddq_u32( sched3, vld1q_u32( &K[12] ) ); + abcd_prev = abcd; + abcd = vsha256hq_u32( abcd_prev, efgh, tmp ); + efgh = vsha256h2q_u32( efgh, abcd_prev, tmp ); + + for( int t = 16; t < 64; t += 16 ) + { + /* Rounds t to t + 3 */ + sched0 = vsha256su1q_u32( vsha256su0q_u32( sched0, sched1 ), sched2, sched3 ); + tmp = vaddq_u32( sched0, vld1q_u32( &K[t] ) ); + abcd_prev = abcd; + abcd = vsha256hq_u32( abcd_prev, efgh, tmp ); + efgh = vsha256h2q_u32( efgh, abcd_prev, tmp ); + + /* Rounds t + 4 to t + 7 */ + sched1 = vsha256su1q_u32( vsha256su0q_u32( sched1, sched2 ), sched3, sched0 ); + tmp = vaddq_u32( sched1, vld1q_u32( &K[t + 4] ) ); + abcd_prev = abcd; + abcd = vsha256hq_u32( abcd_prev, efgh, tmp ); + efgh = vsha256h2q_u32( efgh, abcd_prev, tmp ); + + /* Rounds t + 8 to t + 11 */ + sched2 = vsha256su1q_u32( vsha256su0q_u32( sched2, sched3 ), sched0, sched1 ); + tmp = vaddq_u32( sched2, vld1q_u32( &K[t + 8] ) ); + abcd_prev = abcd; + abcd = vsha256hq_u32( abcd_prev, efgh, tmp ); + efgh = vsha256h2q_u32( efgh, abcd_prev, tmp ); + + /* Rounds t + 12 to t + 15 */ + sched3 = vsha256su1q_u32( vsha256su0q_u32( sched3, sched0 ), sched1, sched2 ); + tmp = vaddq_u32( sched3, vld1q_u32( &K[t + 12] ) ); + abcd_prev = abcd; + abcd = vsha256hq_u32( abcd_prev, efgh, tmp ); + efgh = vsha256h2q_u32( efgh, abcd_prev, tmp ); + } + + abcd = vaddq_u32( abcd, abcd_orig ); + efgh = vaddq_u32( efgh, efgh_orig ); + } + + vst1q_u32( &ctx->state[0], abcd ); + vst1q_u32( &ctx->state[4], efgh ); + + return( processed ); +} + +int mbedtls_internal_sha256_process_a64_crypto( mbedtls_sha256_context *ctx, + const unsigned char data[SHA256_BLOCK_SIZE] ) +{ + return( ( mbedtls_internal_sha256_process_many_a64_crypto( ctx, data, + SHA256_BLOCK_SIZE ) == SHA256_BLOCK_SIZE ) ? 0 : -1 ); +} + +#endif /* MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY */ + + +#if !defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) +#define mbedtls_internal_sha256_process_many_c mbedtls_internal_sha256_process_many +#define mbedtls_internal_sha256_process_c mbedtls_internal_sha256_process +#endif + + +#if !defined(MBEDTLS_SHA256_PROCESS_ALT) && \ + !defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) + #define SHR(x,n) (((x) & 0xFFFFFFFF) >> (n)) #define ROTR(x,n) (SHR(x,n) | ((x) << (32 - (n)))) @@ -169,8 +380,8 @@ static const uint32_t K[] = (d) += local.temp1; (h) = local.temp1 + local.temp2; \ } while( 0 ) -int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx, - const unsigned char data[64] ) +int mbedtls_internal_sha256_process_c( mbedtls_sha256_context *ctx, + const unsigned char data[SHA256_BLOCK_SIZE] ) { struct { @@ -257,7 +468,69 @@ int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx, return( 0 ); } -#endif /* !MBEDTLS_SHA256_PROCESS_ALT */ +#endif /* !MBEDTLS_SHA256_PROCESS_ALT && !MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY */ + + +#if !defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) + +static size_t mbedtls_internal_sha256_process_many_c( + mbedtls_sha256_context *ctx, const uint8_t *data, size_t len ) +{ + size_t processed = 0; + + while( len >= SHA256_BLOCK_SIZE ) + { + if( mbedtls_internal_sha256_process_c( ctx, data ) != 0 ) + return( 0 ); + + data += SHA256_BLOCK_SIZE; + len -= SHA256_BLOCK_SIZE; + + processed += SHA256_BLOCK_SIZE; + } + + return( processed ); +} + +#endif /* !MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY */ + + +#if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) + +static int mbedtls_a64_crypto_sha256_has_support( void ) +{ + static int done = 0; + static int supported = 0; + + if( !done ) + { + supported = mbedtls_a64_crypto_sha256_check_support(); + done = 1; + } + + return( supported ); +} + +static size_t mbedtls_internal_sha256_process_many( mbedtls_sha256_context *ctx, + const uint8_t *msg, size_t len ) +{ + if( mbedtls_a64_crypto_sha256_has_support() ) + return( mbedtls_internal_sha256_process_many_a64_crypto( ctx, msg, len ) ); + else + return( mbedtls_internal_sha256_process_many_c( ctx, msg, len ) ); +} + +int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx, + const unsigned char data[SHA256_BLOCK_SIZE] ) +{ + if( mbedtls_a64_crypto_sha256_has_support() ) + return( mbedtls_internal_sha256_process_a64_crypto( ctx, data ) ); + else + return( mbedtls_internal_sha256_process_c( ctx, data ) ); +} + +#endif /* MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT */ + /* * SHA-256 process buffer @@ -277,7 +550,7 @@ int mbedtls_sha256_update( mbedtls_sha256_context *ctx, return( 0 ); left = ctx->total[0] & 0x3F; - fill = 64 - left; + fill = SHA256_BLOCK_SIZE - left; ctx->total[0] += (uint32_t) ilen; ctx->total[0] &= 0xFFFFFFFF; @@ -297,13 +570,15 @@ int mbedtls_sha256_update( mbedtls_sha256_context *ctx, left = 0; } - while( ilen >= 64 ) + while( ilen >= SHA256_BLOCK_SIZE ) { - if( ( ret = mbedtls_internal_sha256_process( ctx, input ) ) != 0 ) - return( ret ); + size_t processed = + mbedtls_internal_sha256_process_many( ctx, input, ilen ); + if( processed < SHA256_BLOCK_SIZE ) + return( MBEDTLS_ERR_ERROR_GENERIC_ERROR ); - input += 64; - ilen -= 64; + input += processed; + ilen -= processed; } if( ilen > 0 ) @@ -340,7 +615,7 @@ int mbedtls_sha256_finish( mbedtls_sha256_context *ctx, else { /* We'll need an extra block */ - memset( ctx->buffer + used, 0, 64 - used ); + memset( ctx->buffer + used, 0, SHA256_BLOCK_SIZE - used ); if( ( ret = mbedtls_internal_sha256_process( ctx, ctx->buffer ) ) != 0 ) return( ret ); diff --git a/scripts/config.py b/scripts/config.py index 6d5edc7c0b..0ab1e394f0 100755 --- a/scripts/config.py +++ b/scripts/config.py @@ -198,6 +198,7 @@ EXCLUDE_FROM_FULL = frozenset([ 'MBEDTLS_PSA_CRYPTO_SPM', # platform dependency (PSA SPM) 'MBEDTLS_PSA_INJECT_ENTROPY', # build dependency (hook functions) 'MBEDTLS_RSA_NO_CRT', # influences the use of RSA in X.509 and TLS + 'MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY', # interacts with *_USE_A64_CRYPTO_ONLY 'MBEDTLS_TEST_CONSTANT_FLOW_MEMSAN', # build dependency (clang+memsan) 'MBEDTLS_TEST_CONSTANT_FLOW_VALGRIND', # build dependency (valgrind headers) 'MBEDTLS_X509_REMOVE_INFO', # removes a feature diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh index bd38cdb149..458650bfaf 100755 --- a/tests/scripts/all.sh +++ b/tests/scripts/all.sh @@ -1498,6 +1498,9 @@ component_build_module_alt () { # The SpecifiedECDomain parsing code accesses mbedtls_ecp_group fields # directly and assumes the implementation works with partial groups. scripts/config.py unset MBEDTLS_PK_PARSE_EC_EXTENDED + # MBEDTLS_SHA256_*ALT can't be used with MBEDTLS_SHA256_USE_A64_CRYPTO_* + scripts/config.py unset MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT + scripts/config.py unset MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY # Enable all MBEDTLS_XXX_ALT for whole modules. Do not enable # MBEDTLS_XXX_YYY_ALT which are for single functions. scripts/config.py set-all 'MBEDTLS_([A-Z0-9]*|NIST_KW)_ALT' @@ -2702,7 +2705,7 @@ component_build_armcc () { armc6_build_test "--target=arm-arm-none-eabi -march=armv8-m.main" # ARM Compiler 6 - Target ARMv8-A - AArch64 - armc6_build_test "--target=aarch64-arm-none-eabi -march=armv8.2-a" + armc6_build_test "--target=aarch64-arm-none-eabi -march=armv8.2-a+crypto" } component_test_tls13 () {