mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-29 09:32:42 +00:00
arm64: add optimized 16byte ld/st for armv8.4a+
16B ldp/stp are atomic on v8.4a+. See Arm Architecture Reference Manual, "Changes to single-copy atomicity in Armv8.4". Add load/release atomic impls for this instruction and add detection for 8.4a+ capability.
This commit is contained in:
parent
b13fd68848
commit
9b19f16698
@ -1096,8 +1096,21 @@ struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
||||
#endif
|
||||
}
|
||||
#elif defined(ARCH_ARM64)
|
||||
|
||||
static inline T load(const T& dest)
|
||||
{
|
||||
#if defined(ARM_FEATURE_LSE2)
|
||||
u64 data[2];
|
||||
__asm__ volatile("1:\n"
|
||||
"ldp %x[data0], %x[data1], %[dest]\n"
|
||||
"dmb ish\n"
|
||||
: [data0] "=r"(data[0]), [data1] "=r"(data[1])
|
||||
: [dest] "Q"(dest)
|
||||
: "memory");
|
||||
T result;
|
||||
std::memcpy(&result, data, 16);
|
||||
return result;
|
||||
#else
|
||||
u32 tmp;
|
||||
u64 data[2];
|
||||
__asm__ volatile("1:\n"
|
||||
@ -1111,6 +1124,7 @@ struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
||||
T result;
|
||||
std::memcpy(&result, data, 16);
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline T observe(const T& dest)
|
||||
@ -1172,13 +1186,38 @@ struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
||||
static inline void store(T& dest, T value)
|
||||
{
|
||||
// TODO
|
||||
#if defined(ARM_FEATURE_LSE2)
|
||||
u64 src[2];
|
||||
std::memcpy(src, &value, 16);
|
||||
__asm__ volatile("1:\n"
|
||||
"dmb ish\n"
|
||||
"stp %x[data0], %x[data1], %[dest]\n"
|
||||
"dmb ish\n"
|
||||
: [dest] "=Q" (dest)
|
||||
: [data0] "r" (src[0]), [data1] "r" (src[1])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
exchange(dest, value);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void release(T& dest, T value)
|
||||
{
|
||||
#if defined(ARM_FEATURE_LSE2)
|
||||
u64 src[2];
|
||||
std::memcpy(src, &value, 16);
|
||||
__asm__ volatile("1:\n"
|
||||
"dmb ish\n"
|
||||
"stp %x[data0], %x[data1], %[dest]\n"
|
||||
: [dest] "=Q" (dest)
|
||||
: [data0] "r" (src[0]), [data1] "r" (src[1])
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
// TODO
|
||||
exchange(dest, value);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -16,6 +16,13 @@
|
||||
#define ARCH_X64 1
|
||||
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
|
||||
#define ARCH_ARM64 1
|
||||
// v8.4a+ gives us atomic 16 byte ld/st
|
||||
// See Arm C Language Extensions Documentation
|
||||
// Currently there is no feature macro for LSE2 specifically so we define it ourself
|
||||
// Unfortunately the __ARM_ARCH integer macro isn't universally defined so we use this hack instead
|
||||
#if defined(__ARM_ARCH_8_4__) || defined(__ARM_ARCH_8_5__) || defined(__ARM_ARCH_8_6__) || defined(__ARM_ARCH_9__)
|
||||
#define ARM_FEATURE_LSE2 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using std::chrono::steady_clock;
|
||||
|
Loading…
x
Reference in New Issue
Block a user