From 9b19f16698f9caff4984a9fee4208b5d0ed1f9e0 Mon Sep 17 00:00:00 2001 From: sguo35 Date: Wed, 13 Jul 2022 10:59:46 -0700 Subject: [PATCH] arm64: add optimized 16byte ld/st for armv8.4a+ 16B ldp/stp are atomic on v8.4a+. See Arm Architecture Reference Manual, "Changes to single-copy atomicity in Armv8.4". Add load/release atomic impls for this instruction and add detection for 8.4a+ capability. --- rpcs3/util/atomic.hpp | 39 +++++++++++++++++++++++++++++++++++++++ rpcs3/util/types.hpp | 7 +++++++ 2 files changed, 46 insertions(+) diff --git a/rpcs3/util/atomic.hpp b/rpcs3/util/atomic.hpp index 005f73daa4..82559ede48 100644 --- a/rpcs3/util/atomic.hpp +++ b/rpcs3/util/atomic.hpp @@ -1096,8 +1096,21 @@ struct atomic_storage : atomic_storage #endif } #elif defined(ARCH_ARM64) + static inline T load(const T& dest) { +#if defined(ARM_FEATURE_LSE2) + u64 data[2]; + __asm__ volatile("1:\n" + "ldp %x[data0], %x[data1], %[dest]\n" + "dmb ish\n" + : [data0] "=r"(data[0]), [data1] "=r"(data[1]) + : [dest] "Q"(dest) + : "memory"); + T result; + std::memcpy(&result, data, 16); + return result; +#else u32 tmp; u64 data[2]; __asm__ volatile("1:\n" @@ -1111,6 +1124,7 @@ struct atomic_storage : atomic_storage T result; std::memcpy(&result, data, 16); return result; +#endif } static inline T observe(const T& dest) @@ -1172,13 +1186,38 @@ struct atomic_storage : atomic_storage static inline void store(T& dest, T value) { // TODO +#if defined(ARM_FEATURE_LSE2) + u64 src[2]; + std::memcpy(src, &value, 16); + __asm__ volatile("1:\n" + "dmb ish\n" + "stp %x[data0], %x[data1], %[dest]\n" + "dmb ish\n" + : [dest] "=Q" (dest) + : [data0] "r" (src[0]), [data1] "r" (src[1]) + : "memory" + ); +#else exchange(dest, value); +#endif } static inline void release(T& dest, T value) { +#if defined(ARM_FEATURE_LSE2) + u64 src[2]; + std::memcpy(src, &value, 16); + __asm__ volatile("1:\n" + "dmb ish\n" + "stp %x[data0], %x[data1], %[dest]\n" + : [dest] "=Q" (dest) + : [data0] "r" (src[0]), [data1] "r" (src[1]) + : "memory" + ); +#else // TODO exchange(dest, value); +#endif } #endif diff --git a/rpcs3/util/types.hpp b/rpcs3/util/types.hpp index 721c2020e9..32439540b2 100644 --- a/rpcs3/util/types.hpp +++ b/rpcs3/util/types.hpp @@ -16,6 +16,13 @@ #define ARCH_X64 1 #elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) #define ARCH_ARM64 1 +// v8.4a+ gives us atomic 16 byte ld/st +// See Arm C Language Extensions Documentation +// Currently there is no feature macro for LSE2 specifically so we define it ourself +// Unfortunately the __ARM_ARCH integer macro isn't universally defined so we use this hack instead +#if defined(__ARM_ARCH_8_4__) || defined(__ARM_ARCH_8_5__) || defined(__ARM_ARCH_8_6__) || defined(__ARM_ARCH_9__) +#define ARM_FEATURE_LSE2 1 +#endif #endif using std::chrono::steady_clock;