diff --git a/rpcs3/util/atomic.hpp b/rpcs3/util/atomic.hpp index 005f73daa4..82559ede48 100644 --- a/rpcs3/util/atomic.hpp +++ b/rpcs3/util/atomic.hpp @@ -1096,8 +1096,21 @@ struct atomic_storage : atomic_storage #endif } #elif defined(ARCH_ARM64) + static inline T load(const T& dest) { +#if defined(ARM_FEATURE_LSE2) + u64 data[2]; + __asm__ volatile("1:\n" + "ldp %x[data0], %x[data1], %[dest]\n" + "dmb ish\n" + : [data0] "=r"(data[0]), [data1] "=r"(data[1]) + : [dest] "Q"(dest) + : "memory"); + T result; + std::memcpy(&result, data, 16); + return result; +#else u32 tmp; u64 data[2]; __asm__ volatile("1:\n" @@ -1111,6 +1124,7 @@ struct atomic_storage : atomic_storage T result; std::memcpy(&result, data, 16); return result; +#endif } static inline T observe(const T& dest) @@ -1172,13 +1186,38 @@ struct atomic_storage : atomic_storage static inline void store(T& dest, T value) { // TODO +#if defined(ARM_FEATURE_LSE2) + u64 src[2]; + std::memcpy(src, &value, 16); + __asm__ volatile("1:\n" + "dmb ish\n" + "stp %x[data0], %x[data1], %[dest]\n" + "dmb ish\n" + : [dest] "=Q" (dest) + : [data0] "r" (src[0]), [data1] "r" (src[1]) + : "memory" + ); +#else exchange(dest, value); +#endif } static inline void release(T& dest, T value) { +#if defined(ARM_FEATURE_LSE2) + u64 src[2]; + std::memcpy(src, &value, 16); + __asm__ volatile("1:\n" + "dmb ish\n" + "stp %x[data0], %x[data1], %[dest]\n" + : [dest] "=Q" (dest) + : [data0] "r" (src[0]), [data1] "r" (src[1]) + : "memory" + ); +#else // TODO exchange(dest, value); +#endif } #endif diff --git a/rpcs3/util/types.hpp b/rpcs3/util/types.hpp index 721c2020e9..32439540b2 100644 --- a/rpcs3/util/types.hpp +++ b/rpcs3/util/types.hpp @@ -16,6 +16,13 @@ #define ARCH_X64 1 #elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) #define ARCH_ARM64 1 +// v8.4a+ gives us atomic 16 byte ld/st +// See Arm C Language Extensions Documentation +// Currently there is no feature macro for LSE2 specifically so we define it ourself +// Unfortunately the __ARM_ARCH integer macro isn't universally defined so we use this hack instead +#if defined(__ARM_ARCH_8_4__) || defined(__ARM_ARCH_8_5__) || defined(__ARM_ARCH_8_6__) || defined(__ARM_ARCH_9__) +#define ARM_FEATURE_LSE2 1 +#endif #endif using std::chrono::steady_clock;