mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 03:32:55 +00:00
arm64: add optimized 16byte ld/st for armv8.4a+
16B ldp/stp are atomic on v8.4a+. See Arm Architecture Reference Manual, "Changes to single-copy atomicity in Armv8.4". Add load/release atomic impls for this instruction and add detection for 8.4a+ capability.
This commit is contained in:
parent
b13fd68848
commit
9b19f16698
@ -1096,8 +1096,21 @@ struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#elif defined(ARCH_ARM64)
|
#elif defined(ARCH_ARM64)
|
||||||
|
|
||||||
static inline T load(const T& dest)
|
static inline T load(const T& dest)
|
||||||
{
|
{
|
||||||
|
#if defined(ARM_FEATURE_LSE2)
|
||||||
|
u64 data[2];
|
||||||
|
__asm__ volatile("1:\n"
|
||||||
|
"ldp %x[data0], %x[data1], %[dest]\n"
|
||||||
|
"dmb ish\n"
|
||||||
|
: [data0] "=r"(data[0]), [data1] "=r"(data[1])
|
||||||
|
: [dest] "Q"(dest)
|
||||||
|
: "memory");
|
||||||
|
T result;
|
||||||
|
std::memcpy(&result, data, 16);
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
u32 tmp;
|
u32 tmp;
|
||||||
u64 data[2];
|
u64 data[2];
|
||||||
__asm__ volatile("1:\n"
|
__asm__ volatile("1:\n"
|
||||||
@ -1111,6 +1124,7 @@ struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
|||||||
T result;
|
T result;
|
||||||
std::memcpy(&result, data, 16);
|
std::memcpy(&result, data, 16);
|
||||||
return result;
|
return result;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline T observe(const T& dest)
|
static inline T observe(const T& dest)
|
||||||
@ -1172,13 +1186,38 @@ struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
|||||||
static inline void store(T& dest, T value)
|
static inline void store(T& dest, T value)
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
|
#if defined(ARM_FEATURE_LSE2)
|
||||||
|
u64 src[2];
|
||||||
|
std::memcpy(src, &value, 16);
|
||||||
|
__asm__ volatile("1:\n"
|
||||||
|
"dmb ish\n"
|
||||||
|
"stp %x[data0], %x[data1], %[dest]\n"
|
||||||
|
"dmb ish\n"
|
||||||
|
: [dest] "=Q" (dest)
|
||||||
|
: [data0] "r" (src[0]), [data1] "r" (src[1])
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
#else
|
||||||
exchange(dest, value);
|
exchange(dest, value);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void release(T& dest, T value)
|
static inline void release(T& dest, T value)
|
||||||
{
|
{
|
||||||
|
#if defined(ARM_FEATURE_LSE2)
|
||||||
|
u64 src[2];
|
||||||
|
std::memcpy(src, &value, 16);
|
||||||
|
__asm__ volatile("1:\n"
|
||||||
|
"dmb ish\n"
|
||||||
|
"stp %x[data0], %x[data1], %[dest]\n"
|
||||||
|
: [dest] "=Q" (dest)
|
||||||
|
: [data0] "r" (src[0]), [data1] "r" (src[1])
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
#else
|
||||||
// TODO
|
// TODO
|
||||||
exchange(dest, value);
|
exchange(dest, value);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -16,6 +16,13 @@
|
|||||||
#define ARCH_X64 1
|
#define ARCH_X64 1
|
||||||
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
|
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
|
||||||
#define ARCH_ARM64 1
|
#define ARCH_ARM64 1
|
||||||
|
// v8.4a+ gives us atomic 16 byte ld/st
|
||||||
|
// See Arm C Language Extensions Documentation
|
||||||
|
// Currently there is no feature macro for LSE2 specifically so we define it ourself
|
||||||
|
// Unfortunately the __ARM_ARCH integer macro isn't universally defined so we use this hack instead
|
||||||
|
#if defined(__ARM_ARCH_8_4__) || defined(__ARM_ARCH_8_5__) || defined(__ARM_ARCH_8_6__) || defined(__ARM_ARCH_9__)
|
||||||
|
#define ARM_FEATURE_LSE2 1
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using std::chrono::steady_clock;
|
using std::chrono::steady_clock;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user