From f0fd7e2e19975fe442455080475e74ec43a3f520 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 20 Oct 2020 22:00:15 +0300 Subject: [PATCH] Atomics: use WaitOnAddress if available (Win8+) --- Utilities/sync.h | 8 ++++ rpcs3/Emu/CPU/CPUThread.cpp | 4 ++ rpcs3/util/atomic.cpp | 89 ++++++++++++++++++++++++++++++------- 3 files changed, 84 insertions(+), 17 deletions(-) diff --git a/Utilities/sync.h b/Utilities/sync.h index 60ac01d9a1..c002bfd98e 100644 --- a/Utilities/sync.h +++ b/Utilities/sync.h @@ -28,6 +28,14 @@ DYNAMIC_IMPORT("ntdll.dll", NtWaitForKeyedEvent, NTSTATUS(HANDLE, PVOID Key, BOO DYNAMIC_IMPORT("ntdll.dll", NtReleaseKeyedEvent, NTSTATUS(HANDLE, PVOID Key, BOOLEAN Alertable, PLARGE_INTEGER Timeout)); DYNAMIC_IMPORT("ntdll.dll", NtWaitForSingleObject, NTSTATUS(HANDLE Handle, BOOLEAN Alertable, PLARGE_INTEGER Timeout)); DYNAMIC_IMPORT("ntdll.dll", NtDelayExecution, NTSTATUS(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval)); + +namespace wutex +{ + DYNAMIC_IMPORT("API-MS-Win-Core-Synch-l1-2-0.dll", WaitOnAddress, BOOL(volatile VOID* Address, PVOID CompareAddress, SIZE_T AddressSize, DWORD dwMilliseconds)); + DYNAMIC_IMPORT("API-MS-Win-Core-Synch-l1-2-0.dll", WakeByAddressSingle, BOOL(PVOID Address)); + DYNAMIC_IMPORT("API-MS-Win-Core-Synch-l1-2-0.dll", WakeByAddressAll, BOOL(PVOID Address)); +} + #endif #ifndef __linux__ diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 57b8c663c2..a32619da4c 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -818,11 +818,15 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this, bool cancel_if_not_suspen // Check only CPUs which haven't acknowledged their waiting state yet for_all_cpu([&](cpu_thread* cpu, u64 index) { + verify(HERE), cpu->state & cpu_flag::pause; + if (cpu->state & cpu_flag::wait) { ctr->cpu_copy_bits[index / 64] &= ~(1ull << (index % 64)); } }); + + _mm_pause(); } // Extract queue and reverse element order (FILO to FIFO) (TODO: maybe leave order as is?) diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index b306831fb4..fc23ffd66f 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -214,7 +214,13 @@ namespace }; } #elif defined(_WIN32) -using sema_handle = std::uint16_t; +namespace +{ + struct alignas(64) sema_handle + { + atomic_t sema; + }; +} #else namespace { @@ -560,18 +566,41 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu } } #elif defined(_WIN32) && !defined(USE_POSIX) - LARGE_INTEGER qw; - qw.QuadPart = -static_cast(timeout / 100); - - if (timeout % 100) + if (wutex::WaitOnAddress) { - // Round up to closest 100ns unit - qw.QuadPart -= 1; + if (s_sema_list[sema_id].sema.try_dec(0)) + { + fallback = true; + } + else + { + const DWORD time_ms = timeout + 1 ? INFINITE : (timeout > (UINT32_MAX - 1) * 1000'000 ? (UINT32_MAX - 1) : timeout / 1000'000); + + sema_handle _cmp{}; + + wutex::WaitOnAddress(&s_sema_list[sema_id].sema, &_cmp.sema, sizeof(_cmp.sema), time_ms); + + if (s_sema_list[sema_id].sema.try_dec(0)) + { + fallback = true; + } + } } - - if (!NtWaitForKeyedEvent(nullptr, &s_sema_list[sema_id], false, timeout + 1 ? &qw : nullptr)) + else { - fallback = true; + LARGE_INTEGER qw; + qw.QuadPart = -static_cast(timeout / 100); + + if (timeout % 100) + { + // Round up to closest 100ns unit + qw.QuadPart -= 1; + } + + if (!NtWaitForKeyedEvent(nullptr, &s_sema_list[sema_id], false, timeout + 1 ? &qw : nullptr)) + { + fallback = true; + } } #elif defined(USE_POSIX) struct timespec ts; @@ -671,11 +700,21 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu fallback = true; } #elif defined(_WIN32) && !defined(USE_POSIX) - static LARGE_INTEGER instant{}; - - if (!NtWaitForKeyedEvent(nullptr, &s_sema_list[sema_id], false, &instant)) + if (wutex::WaitOnAddress) { - fallback = true; + if (s_sema_list[sema_id].sema.try_dec(0)) + { + fallback = true; + } + } + else + { + static LARGE_INTEGER instant{}; + + if (!NtWaitForKeyedEvent(nullptr, &s_sema_list[sema_id], false, &instant)) + { + fallback = true; + } } #elif defined(USE_POSIX) if (sem_trywait(&s_sema_list[sema_id]) == 0) @@ -772,7 +811,15 @@ void atomic_storage_futex::notify_one(const void* data) s_sema_list[sema_id].sema++; futex(&s_sema_list[sema_id].sema, FUTEX_WAKE_PRIVATE, 1); #elif defined(_WIN32) - NtReleaseKeyedEvent(nullptr, &s_sema_list[sema_id], 1, nullptr); + if (wutex::WaitOnAddress) + { + s_sema_list[sema_id].sema++; + wutex::WakeByAddressSingle(&s_sema_list[sema_id].sema); + } + else + { + NtReleaseKeyedEvent(nullptr, &s_sema_list[sema_id], 1, nullptr); + } #else dumb_sema& sema = *s_sema_list[sema_id]; @@ -830,9 +877,17 @@ void atomic_storage_futex::notify_all(const void* data) s_sema_list[sema_id].sema += count; futex(&s_sema_list[sema_id].sema, FUTEX_WAKE_PRIVATE, 0x7fff'ffff); #elif defined(_WIN32) - for (u32 i = 0; i < count; i++) + if (wutex::WaitOnAddress) { - NtReleaseKeyedEvent(nullptr, &s_sema_list[sema_id], count, nullptr); + s_sema_list[sema_id].sema += count; + wutex::WakeByAddressAll(&s_sema_list[sema_id].sema); + } + else + { + for (u32 i = 0; i < count; i++) + { + NtReleaseKeyedEvent(nullptr, &s_sema_list[sema_id], count, nullptr); + } } #else if (count)