diff --git a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h
index ac374d2a4b..654a845f45 100644
--- a/include/mbedtls/check_config.h
+++ b/include/mbedtls/check_config.h
@@ -712,41 +712,6 @@
 #if defined(MBEDTLS_SHA512_ALT) || defined(MBEDTLS_SHA512_PROCESS_ALT)
 #error "MBEDTLS_SHA512_*ALT can't be used with MBEDTLS_SHA512_USE_A64_CRYPTO_*"
 #endif
-/*
- * Best performance comes from most recent compilers, with intrinsics and -O3.
- * Must compile with -march=armv8.2-a+sha3, but we can't detect armv8.2-a, and
- * can't always detect __ARM_FEATURE_SHA512 (notably clang 7-12).
- *
- * GCC < 8 won't work at all (lacks the sha512 instructions)
- * GCC >= 8 uses intrinsics, sets __ARM_FEATURE_SHA512
- *
- * Clang < 7 won't work at all (lacks the sha512 instructions)
- * Clang 7-12 don't have intrinsics (but we work around that with inline
- *            assembler) or __ARM_FEATURE_SHA512
- * Clang == 13.0.0 same as clang 12 (only seen on macOS)
- * Clang >= 13.0.1 has __ARM_FEATURE_SHA512 and intrinsics
- */
-#if defined(__aarch64__) && !defined(__ARM_FEATURE_SHA512)
-   /* Test Clang first, as it defines __GNUC__ */
-#  if defined(__clang__)
-#    if __clang_major__ < 7
-#      error "A more recent Clang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-#    elif __clang_major__ < 13 || \
-         (__clang_major__ == 13 && __clang_minor__ == 0 && __clang_patchlevel__ == 0)
-       /* We implement the intrinsics with inline assembler, so don't error */
-#    else
-#      error "Must use minimum -march=armv8.2-a+sha3 for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-#    endif
-#  elif defined(__GNUC__)
-#    if __GNUC__ < 8
-#      error "A more recent GCC is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-#    else
-#      error "Must use minimum -march=armv8.2-a+sha3 for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-#    endif
-#  else
-#    error "Only GCC and Clang supported for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
-#  endif
-#endif
 
 #endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */
 
@@ -767,9 +732,7 @@
 #if defined(MBEDTLS_SHA256_ALT) || defined(MBEDTLS_SHA256_PROCESS_ALT)
 #error "MBEDTLS_SHA256_*ALT can't be used with MBEDTLS_SHA256_USE_A64_CRYPTO_*"
 #endif
-#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO)
-#error "Must use minimum -march=armv8-a+crypto for MBEDTLS_SHA256_USE_A64_CRYPTO_*"
-#endif
+
 #endif
 
 #if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY) && \
diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index 5aff9c5b60..1995e54b2a 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h
@@ -3115,9 +3115,6 @@
  * \note If MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT is defined when building
  * for a non-Aarch64 build it will be silently ignored.
  *
- * \note The code uses Neon intrinsics, so \c CFLAGS must be set to a minimum
- * of \c -march=armv8-a+crypto.
- *
  * \warning MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT cannot be defined at the
  * same time as MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY.
  *
@@ -3140,9 +3137,6 @@
  * \note This allows builds with a smaller code size than with
  * MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT
  *
- * \note The code uses Neon intrinsics, so \c CFLAGS must be set to a minimum
- * of \c -march=armv8-a+crypto.
- *
  * \warning MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY cannot be defined at the same
  * time as MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT.
  *
@@ -3197,9 +3191,7 @@
  * for a non-Aarch64 build it will be silently ignored.
  *
  * \note The code uses the SHA-512 Neon intrinsics, so requires GCC >= 8 or
- * Clang >= 7, and \c CFLAGS must be set to a minimum of
- * \c -march=armv8.2-a+sha3. An optimisation level of \c -O3 generates the
- * fastest code.
+ * Clang >= 7.
  *
  * \warning MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT cannot be defined at the
  * same time as MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY.
@@ -3224,9 +3216,7 @@
  * MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
  *
  * \note The code uses the SHA-512 Neon intrinsics, so requires GCC >= 8 or
- * Clang >= 7, and \c CFLAGS must be set to a minimum of
- * \c -march=armv8.2-a+sha3. An optimisation level of \c -O3 generates the
- * fastest code.
+ * Clang >= 7.
  *
  * \warning MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY cannot be defined at the same
  * time as MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT.
diff --git a/library/sha256.c b/library/sha256.c
index cb09a71ec1..23cd406c37 100644
--- a/library/sha256.c
+++ b/library/sha256.c
@@ -22,6 +22,23 @@
  *  http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
  */
 
+#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO) && \
+    defined(__clang__) &&  __clang_major__ < 18 && __clang_major__ > 3
+/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged.
+ *
+ * The intrinsic declaration are guarded by predefined ACLE macros in clang:
+ * these are normally only enabled by the -march option on the command line.
+ * By defining the macros ourselves we gain access to those declarations without
+ * requiring -march on the command line.
+ *
+ * `arm_neon.h` could be included by any header file, so we put these defines
+ * at the top of this file, before any includes.
+ */
+#define __ARM_FEATURE_CRYPTO 1
+#define NEED_TARGET_OPTIONS
+#endif /* __aarch64__ && __clang__ &&
+          !__ARM_FEATURE_CRYPTO && __clang_major__ < 18 && __clang_major__ > 3 */
+
 #include "common.h"
 
 #if defined(MBEDTLS_SHA256_C) || defined(MBEDTLS_SHA224_C)
@@ -37,6 +54,30 @@
 #if defined(__aarch64__)
 #  if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) || \
     defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY)
+/* *INDENT-OFF* */
+#    if !defined(__ARM_FEATURE_CRYPTO) || defined(NEED_TARGET_OPTIONS)
+#      if defined(__clang__)
+#        if __clang_major__ < 4
+#          error "A more recent Clang is required for MBEDTLS_SHA256_USE_A64_CRYPTO_*"
+#        endif
+#        pragma clang attribute push (__attribute__((target("crypto"))), apply_to=function)
+#        define MBEDTLS_POP_TARGET_PRAGMA
+#      elif defined(__GNUC__)
+         /* FIXME: GCC-5 annouce crypto extension, but some intrinsic are missed.
+          *        Known miss intrinsic can be workaround.
+          */
+#        if __GNUC__ < 6
+#          error "A more recent GCC is required for MBEDTLS_SHA256_USE_A64_CRYPTO_*"
+#        else
+#          pragma GCC push_options
+#          pragma GCC target ("arch=armv8-a+crypto")
+#          define MBEDTLS_POP_TARGET_PRAGMA
+#        endif
+#      else
+#        error "Only GCC and Clang supported for MBEDTLS_SHA256_USE_A64_CRYPTO_*"
+#      endif
+#    endif
+/* *INDENT-ON* */
 #    include <arm_neon.h>
 #  endif
 #  if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT)
@@ -353,8 +394,16 @@ int mbedtls_internal_sha256_process_a64_crypto(mbedtls_sha256_context *ctx,
             SHA256_BLOCK_SIZE) ? 0 : -1;
 }
 
-#endif /* MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY */
+#if defined(MBEDTLS_POP_TARGET_PRAGMA)
+#if defined(__clang__)
+#pragma clang attribute pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#undef MBEDTLS_POP_TARGET_PRAGMA
+#endif
 
+#endif /* MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY */
 
 #if !defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT)
 #define mbedtls_internal_sha256_process_many_c mbedtls_internal_sha256_process_many
diff --git a/library/sha512.c b/library/sha512.c
index efcbed413f..bc92a8de21 100644
--- a/library/sha512.c
+++ b/library/sha512.c
@@ -22,6 +22,26 @@
  *  http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
  */
 
+#if defined(__aarch64__) && !defined(__ARM_FEATURE_SHA512) && \
+    defined(__clang__) &&  __clang_major__ < 18 && \
+    __clang_major__ >= 13 && __clang_minor__ > 0 && __clang_patchlevel__ > 0
+/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged.
+ *
+ * The intrinsic declaration are guarded by predefined ACLE macros in clang:
+ * these are normally only enabled by the -march option on the command line.
+ * By defining the macros ourselves we gain access to those declarations without
+ * requiring -march on the command line.
+ *
+ * `arm_neon.h` could be included by any header file, so we put these defines
+ * at the top of this file, before any includes.
+ */
+#define __ARM_FEATURE_SHA512 1
+#define NEED_TARGET_OPTIONS
+#endif /* __aarch64__ && __clang__ &&
+          !__ARM_FEATURE_SHA512 && __clang_major__ < 18 &&
+          __clang_major__ >= 13 && __clang_minor__ > 0 &&
+          __clang_patchlevel__ > 0 */
+
 #include "common.h"
 
 #if defined(MBEDTLS_SHA512_C) || defined(MBEDTLS_SHA384_C)
@@ -43,6 +63,47 @@
 #if defined(__aarch64__)
 #  if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \
     defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+/* *INDENT-OFF* */
+/*
+ * Best performance comes from most recent compilers, with intrinsics and -O3.
+ * Must compile with -march=armv8.2-a+sha3, but we can't detect armv8.2-a, and
+ * can't always detect __ARM_FEATURE_SHA512 (notably clang 7-12).
+ *
+ * GCC < 8 won't work at all (lacks the sha512 instructions)
+ * GCC >= 8 uses intrinsics, sets __ARM_FEATURE_SHA512
+ *
+ * Clang < 7 won't work at all (lacks the sha512 instructions)
+ * Clang 7-12 don't have intrinsics (but we work around that with inline
+ *            assembler) or __ARM_FEATURE_SHA512
+ * Clang == 13.0.0 same as clang 12 (only seen on macOS)
+ * Clang >= 13.0.1 has __ARM_FEATURE_SHA512 and intrinsics
+ */
+#    if !defined(__ARM_FEATURE_SHA512) || defined(NEED_TARGET_OPTIONS)
+       /* Test Clang first, as it defines __GNUC__ */
+#      if defined(__clang__)
+#        if __clang_major__ < 7
+#          error "A more recent Clang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#        elif __clang_major__ < 13 || \
+              (__clang_major__ == 13 && __clang_minor__ == 0 && \
+               __clang_patchlevel__ == 0)
+           /* We implement the intrinsics with inline assembler, so don't error */
+#        else
+#          pragma clang attribute push (__attribute__((target("sha3"))), apply_to=function)
+#          define MBEDTLS_POP_TARGET_PRAGMA
+#        endif
+#      elif defined(__GNUC__)
+#        if __GNUC__ < 8
+#          error "A more recent GCC is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#        else
+#          pragma GCC push_options
+#          pragma GCC target ("arch=armv8.2-a+sha3")
+#          define MBEDTLS_POP_TARGET_PRAGMA
+#        endif
+#      else
+#        error "Only GCC and Clang supported for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#      endif
+#    endif
+/* *INDENT-ON* */
 #    include <arm_neon.h>
 #  endif
 #  if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
@@ -516,6 +577,15 @@ int mbedtls_internal_sha512_process_a64_crypto(mbedtls_sha512_context *ctx,
             SHA512_BLOCK_SIZE) ? 0 : -1;
 }
 
+#if defined(MBEDTLS_POP_TARGET_PRAGMA)
+#if defined(__clang__)
+#pragma clang attribute pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#undef MBEDTLS_POP_TARGET_PRAGMA
+#endif
+
 #endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */