Rewrite condition variables

Implement helper functions balanced_wait_until and balanced_awaken
They include new path for Windows 8.1+ (WaitOnAddress)

shared_mutex, cond_variable, cond_one, cond_x16 modified to use it
Added helper function utils::popcnt16
Replace most semaphore<> with shared_mutex
This commit is contained in:
Nekotekina 2018-11-26 18:55:22 +03:00
parent f442a8a84c
commit 96cabeadff
23 changed files with 269 additions and 338 deletions

View File

@ -75,7 +75,7 @@ namespace logs
#endif
uchar* m_fptr{};
z_stream m_zs{};
semaphore<> m_m;
shared_mutex m_m;
alignas(128) atomic_t<u64> m_buf{0}; // MSB (40 bit): push begin, LSB (24 bis): push size
alignas(128) atomic_t<u64> m_out{0}; // Amount of bytes written to file
@ -183,7 +183,7 @@ namespace logs
channel SPU("SPU");
// Channel registry mutex
semaphore<> g_mutex;
shared_mutex g_mutex;
// Must be set to true in main()
atomic_t<bool> g_init{false};

View File

@ -44,6 +44,20 @@ namespace utils
#endif
}
inline u8 popcnt16(u16 arg)
{
const u32 a1 = arg & 0x5555;
const u32 a2 = (arg >> 1) & 0x5555;
const u32 a3 = a1 + a2;
const u32 b1 = a3 & 0x3333;
const u32 b2 = (a3 >> 2) & 0x3333;
const u32 b3 = b1 + b2;
const u32 c1 = b3 & 0x0f0f;
const u32 c2 = (b3 >> 4) & 0x0f0f;
const u32 c3 = c1 + c2;
return static_cast<u8>(c3 + (c3 >> 8));
}
// Rotate helpers
#if defined(__GNUG__)

View File

@ -9,110 +9,47 @@
bool cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
{
verify(HERE), _old != -1; // Very unlikely: it requires 2^32 distinct threads to wait simultaneously
const bool is_inf = _timeout > max_timeout;
verify("cond_variable overflow" HERE), (_old & 0xffff) == 0; // Very unlikely: it requires 65535 distinct threads to wait simultaneously
return balanced_wait_until(m_value, _timeout, [&](u32& value, auto... ret) -> int
{
if (value >> 16)
{
// Success
value -= 0x10001;
return +1;
}
if constexpr (sizeof...(ret))
{
// Retire
value -= 1;
return -1;
}
return 0;
});
#ifdef _WIN32
LARGE_INTEGER timeout;
timeout.QuadPart = _timeout * -10;
if (HRESULT rc = _timeout ? NtWaitForKeyedEvent(nullptr, &m_value, false, is_inf ? nullptr : &timeout) : WAIT_TIMEOUT)
if (_old >= 0x10000 && !OptWaitOnAddress && m_value)
{
verify(HERE), rc == WAIT_TIMEOUT;
// Retire
while (!m_value.try_dec())
{
timeout.QuadPart = 0;
if (HRESULT rc2 = NtWaitForKeyedEvent(nullptr, &m_value, false, &timeout))
{
verify(HERE), rc2 == WAIT_TIMEOUT;
SwitchToThread();
continue;
}
return true;
}
return false;
}
return true;
#else
timespec timeout;
timeout.tv_sec = _timeout / 1000000;
timeout.tv_nsec = (_timeout % 1000000) * 1000;
for (u32 value = _old + 1;; value = m_value)
{
const int err = futex(&m_value, FUTEX_WAIT_PRIVATE, value, is_inf ? nullptr : &timeout) == 0
? 0
: errno;
// Normal or timeout wakeup
if (!err || (!is_inf && err == ETIMEDOUT))
{
// Cleanup (remove waiter)
verify(HERE), m_value--;
return !err;
}
// Not a wakeup
verify(HERE), err == EAGAIN;
// Workaround possibly stolen signal
imp_wake(1);
}
#endif
}
void cond_variable::imp_wake(u32 _count) noexcept
{
#ifdef _WIN32
// Try to subtract required amount of waiters
const u32 count = m_value.atomic_op([=](u32& value)
balanced_awaken(m_value, m_value.atomic_op([&](u32& value) -> u32
{
if (value > _count)
{
value -= _count;
return _count;
}
// Subtract already signaled number from total amount of waiters
const u32 can_sig = (value & 0xffff) - (value >> 16);
const u32 num_sig = std::min<u32>(can_sig, _count);
return std::exchange(value, 0);
});
for (u32 i = count; i > 0; i--)
{
NtReleaseKeyedEvent(nullptr, &m_value, false, nullptr);
}
#else
for (u32 i = _count; i > 0; std::this_thread::yield())
{
const u32 value = m_value;
// Constrain remaining amount with imaginary waiter count
if (i > value)
{
i = value;
}
if (!value || i == 0)
{
// Nothing to do
return;
}
if (const int res = futex(&m_value, FUTEX_WAKE_PRIVATE, i > INT_MAX ? INT_MAX : i))
{
verify(HERE), res >= 0 && (u32)res <= i;
i -= res;
}
if (!m_value || i == 0)
{
// Escape
return;
}
}
#endif
value += num_sig << 16;
return num_sig;
}));
}
bool notifier::imp_try_lock(u32 count)
@ -209,62 +146,29 @@ bool notifier::wait(u64 usec_timeout)
return res;
}
bool cond_one::imp_wait(u32 _old, u64 _timeout) noexcept
bool cond_one::imp_wait(u64 _timeout) noexcept
{
verify(HERE), _old == c_lock;
// State transition: c_sig -> c_lock \ c_lock -> c_wait
const u32 _old = m_value.fetch_sub(1);
if (LIKELY(_old == c_sig))
return true;
const bool is_inf = _timeout > cond_variable::max_timeout;
#ifdef _WIN32
LARGE_INTEGER timeout;
timeout.QuadPart = _timeout * -10;
if (HRESULT rc = _timeout ? NtWaitForKeyedEvent(nullptr, &m_value, false, is_inf ? nullptr : &timeout) : WAIT_TIMEOUT)
return balanced_wait_until(m_value, _timeout, [&](u32& value, auto... ret) -> int
{
verify(HERE), rc == WAIT_TIMEOUT;
// Retire
const bool signaled = m_value.exchange(c_lock) == c_sig;
while (signaled)
if (value == c_sig)
{
timeout.QuadPart = 0;
if (HRESULT rc2 = NtWaitForKeyedEvent(nullptr, &m_value, false, &timeout))
{
verify(HERE), rc2 == WAIT_TIMEOUT;
SwitchToThread();
continue;
}
return true;
value = c_lock;
return +1;
}
return false;
}
#else
timespec timeout;
timeout.tv_sec = _timeout / 1000000;
timeout.tv_nsec = (_timeout % 1000000) * 1000;
for (u32 value = _old - 1; value != c_sig; value = m_value)
{
const int err = futex(&m_value, FUTEX_WAIT_PRIVATE, value, is_inf ? nullptr : &timeout) == 0
? 0
: errno;
// Normal or timeout wakeup
if (!err || (!is_inf && err == ETIMEDOUT))
if constexpr (sizeof...(ret))
{
return m_value.exchange(c_lock) == c_sig;
value = c_lock;
return -1;
}
// Not a wakeup
verify(HERE), err == EAGAIN;
}
#endif
verify(HERE), m_value.exchange(c_lock) == c_sig;
return true;
return 0;
});
}
void cond_one::imp_notify() noexcept
@ -287,79 +191,54 @@ void cond_one::imp_notify() noexcept
return;
}
#ifdef _WIN32
NtReleaseKeyedEvent(nullptr, &m_value, false, nullptr);
#else
futex(&m_value, FUTEX_WAKE_PRIVATE, 1);
#endif
balanced_awaken(m_value, 1);
}
bool cond_x16::imp_wait(u32 _new, u32 slot, u64 _timeout) noexcept
bool cond_x16::imp_wait(u32 slot, u64 _timeout) noexcept
{
const u32 wait_bit = c_wait << slot;
const u32 lock_bit = c_lock << slot;
const bool is_inf = _timeout > cond_variable::max_timeout;
#ifdef _WIN32
LARGE_INTEGER timeout;
timeout.QuadPart = _timeout * -10;
if (HRESULT rc = _timeout ? NtWaitForKeyedEvent(nullptr, &m_cvx16, false, is_inf ? nullptr : &timeout) : WAIT_TIMEOUT)
// Change state from c_lock to c_wait
const u32 old_ = m_cvx16.fetch_op([=](u32& cvx16)
{
verify(HERE), rc == WAIT_TIMEOUT;
// Retire
const bool signaled = this->retire(slot);
while (signaled)
if (cvx16 & wait_bit)
{
timeout.QuadPart = 0;
// c_sig -> c_lock
cvx16 &= ~wait_bit;
}
else
{
cvx16 |= wait_bit;
cvx16 &= ~lock_bit;
}
});
if (HRESULT rc2 = NtWaitForKeyedEvent(nullptr, &m_cvx16, false, &timeout))
{
verify(HERE), rc2 == WAIT_TIMEOUT;
SwitchToThread();
continue;
}
if (old_ & wait_bit)
{
// Already signaled, return without waiting
return true;
}
return true;
return balanced_wait_until(m_cvx16, _timeout, [&](u32& cvx16, auto... ret) -> int
{
if (cvx16 & lock_bit)
{
// c_sig -> c_lock
cvx16 &= ~wait_bit;
return +1;
}
return false;
}
if (!this->retire(slot))
{
// Stolen notification: restore balance
NtReleaseKeyedEvent(nullptr, &m_cvx16, false, nullptr);
}
#else
timespec timeout;
timeout.tv_sec = _timeout / 1000000;
timeout.tv_nsec = (_timeout % 1000000) * 1000;
for (u32 value = _new; ((value >> slot) & c_sig) != c_sig; value = m_cvx16)
{
const int err = futex(&m_cvx16, FUTEX_WAIT_PRIVATE, value, is_inf ? nullptr : &timeout) == 0
? 0
: errno;
// Normal or timeout wakeup
if (!err || (!is_inf && err == ETIMEDOUT))
if constexpr (sizeof...(ret))
{
return this->retire(slot);
// Retire
cvx16 |= lock_bit;
cvx16 &= ~wait_bit;
return -1;
}
// Not a wakeup
verify(HERE), err == EAGAIN;
}
// Convert c_sig to c_lock
m_cvx16 &= ~wait_bit;
#endif
return true;
return 0;
});
}
void cond_x16::imp_notify() noexcept
@ -386,13 +265,5 @@ void cond_x16::imp_notify() noexcept
return;
}
#ifdef _WIN32
for (u32 i = 0; i < 16; i++)
{
if ((wait_mask >> i) & 1)
NtReleaseKeyedEvent(nullptr, &m_cvx16, false, nullptr);
}
#else
futex(&m_cvx16, FUTEX_WAKE_PRIVATE, INT_MAX);
#endif
balanced_awaken(m_cvx16, utils::popcnt16(wait_mask));
}

View File

@ -57,7 +57,7 @@ public:
{
if (m_value)
{
imp_wake(-1);
imp_wake(65535);
}
}
@ -140,7 +140,7 @@ class cond_one
atomic_t<u32> m_value{0};
bool imp_wait(u32 _old, u64 _timeout) noexcept;
bool imp_wait(u64 _timeout) noexcept;
void imp_notify() noexcept;
public:
@ -162,13 +162,7 @@ public:
{
AUDIT(lock.owns_lock());
AUDIT(lock.mutex() == this);
// State transition: c_sig -> c_lock, c_lock -> c_wait
const u32 _old = m_value.fetch_sub(1);
if (LIKELY(_old == c_sig))
return true;
return imp_wait(_old, usec_timeout);
return imp_wait(usec_timeout);
}
void notify() noexcept
@ -244,28 +238,9 @@ class cond_x16
}
};
bool imp_wait(u32 _new, u32 slot, u64 _timeout) noexcept;
bool imp_wait(u32 slot, u64 _timeout) noexcept;
void imp_notify() noexcept;
bool retire(u32 slot) noexcept
{
const u32 wait_bit = c_wait << slot;
const u32 lock_bit = c_lock << slot;
return m_cvx16.atomic_op([=](u32& cvx16)
{
if (cvx16 & lock_bit)
{
cvx16 &= ~wait_bit;
return true;
}
cvx16 |= lock_bit;
cvx16 &= ~wait_bit;
return false;
});
}
public:
constexpr cond_x16() = default;
@ -277,33 +252,7 @@ public:
bool wait(lock_x16 const& lock, u64 usec_timeout = -1) noexcept
{
AUDIT(lock.m_this == this);
const u32 wait_bit = c_wait << lock.m_slot;
const u32 lock_bit = c_lock << lock.m_slot;
// Change state from c_lock to c_wait
const u32 new_ = m_cvx16.atomic_op([=](u32& cvx16)
{
if (cvx16 & wait_bit)
{
cvx16 &= ~wait_bit;
}
else
{
cvx16 |= wait_bit;
cvx16 &= ~lock_bit;
}
return cvx16;
});
if (new_ & lock_bit)
{
// Already signaled, return without waiting
return true;
}
return imp_wait(new_, lock.m_slot, usec_timeout);
return imp_wait(lock.m_slot, usec_timeout);
}
void notify_all() noexcept

View File

@ -44,42 +44,24 @@ void shared_mutex::imp_unlock_shared(u32 old)
void shared_mutex::imp_wait()
{
#ifdef _WIN32
NtWaitForKeyedEvent(nullptr, &m_value, false, nullptr);
#else
while (true)
while (!balanced_wait_until(m_value, -1, [&](u32& value, auto...)
{
// Load new value, try to acquire c_sig
auto [value, ok] = m_value.fetch_op([](u32& value)
if (value >= c_sig)
{
if (value >= c_sig)
{
value -= c_sig;
return true;
}
return false;
});
if (ok)
{
return;
value -= c_sig;
return true;
}
futex(&m_value, FUTEX_WAIT_BITSET_PRIVATE, value, nullptr, c_sig);
return false;
}))
{
}
#endif
}
void shared_mutex::imp_signal()
{
#ifdef _WIN32
NtReleaseKeyedEvent(nullptr, &m_value, false, nullptr);
#else
m_value += c_sig;
futex(&m_value, FUTEX_WAKE_BITSET_PRIVATE, 1, nullptr, c_sig);
//futex(&m_value, FUTEX_WAKE_BITSET_PRIVATE, c_one, nullptr, c_sig - 1);
#endif
balanced_awaken(m_value, 1);
}
void shared_mutex::imp_lock(u32 val)
@ -166,29 +148,6 @@ void shared_mutex::imp_lock_unlock()
busy_wait(1500);
}
#ifndef _WIN32
while (false)
{
const u32 val = m_value;
if (val % c_one == 0 && (val / c_one < _max || val >= c_sig))
{
return;
}
if (val <= c_one)
{
// Can't expect a signal
break;
}
_max = val / c_one;
// Monitor all bits except c_sig
futex(&m_value, FUTEX_WAIT_BITSET_PRIVATE, val, nullptr, c_sig - 1);
}
#endif
// Lock and unlock
if (!m_value.fetch_add(c_one))
{

View File

@ -72,12 +72,12 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t
switch (futex_op)
{
case FUTEX_WAIT:
case FUTEX_WAIT_PRIVATE:
{
mask = -1;
[[fallthrough]];
}
case FUTEX_WAIT_BITSET:
case FUTEX_WAIT_BITSET_PRIVATE:
{
if (*reinterpret_cast<volatile uint*>(uaddr) != val)
{
@ -115,12 +115,12 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t
return res;
}
case FUTEX_WAKE:
case FUTEX_WAKE_PRIVATE:
{
mask = -1;
[[fallthrough]];
}
case FUTEX_WAKE_BITSET:
case FUTEX_WAKE_BITSET_PRIVATE:
{
int res = 0;
@ -149,3 +149,144 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t
return g_futex(uaddr, futex_op, val, timeout, mask);
#endif
}
template <typename T, typename Pred>
bool balanced_wait_until(atomic_t<T>& var, u64 usec_timeout, Pred&& pred)
{
static_assert(sizeof(T) == 4);
const bool is_inf = usec_timeout > u64{UINT32_MAX / 1000} * 1000000;
// Optional second argument indicates that the predicate should try to retire
auto test_pred = [&](T& _new, auto... args)
{
T old = var.load();
while (true)
{
_new = old;
// Zero indicates failure without modifying the value
// Negative indicates failure but modifies the value
auto ret = std::invoke(std::forward<Pred>(pred), _new, args...);
if (LIKELY(!ret || var.compare_exchange(old, _new)))
{
return ret > 0;
}
}
};
T value;
#ifdef _WIN32
if (OptWaitOnAddress)
{
while (!test_pred(value))
{
if (OptWaitOnAddress(&var, &value, sizeof(u32), is_inf ? INFINITE : usec_timeout / 1000))
{
if (!test_pred(value) && !test_pred(value, nullptr))
{
return false;
}
break;
}
if (GetLastError() == ERROR_TIMEOUT)
{
// Retire
return test_pred(value, nullptr);
}
}
return true;
}
LARGE_INTEGER timeout;
timeout.QuadPart = usec_timeout * -10;
if (!usec_timeout || NtWaitForKeyedEvent(nullptr, &var, false, is_inf ? nullptr : &timeout))
{
// Timed out: retire
if (!test_pred(value, nullptr))
{
return false;
}
// Signaled in the last moment: restore balance
NtWaitForKeyedEvent(nullptr, &var, false, nullptr);
return true;
}
if (!test_pred(value) && !test_pred(value, nullptr))
{
// Stolen notification: restore balance
NtReleaseKeyedEvent(nullptr, &var, false, nullptr);
return false;
}
return true;
#else
struct timespec timeout;
timeout.tv_sec = usec_timeout / 1000000;
timeout.tv_nsec = (usec_timeout % 1000000) * 1000;
while (!test_pred(value))
{
if (futex(&var, FUTEX_WAIT_PRIVATE, static_cast<u32>(value), is_inf ? nullptr : &timeout) == 0)
{
if (!test_pred(value) && !test_pred(value, nullptr))
{
return false;
}
break;
}
switch (errno)
{
case EAGAIN: break;
case ETIMEDOUT: return test_pred(value, nullptr);
default: verify("Unknown futex error" HERE), 0;
}
}
return true;
#endif
}
template <typename T>
void balanced_awaken(atomic_t<T>& var, u32 weight)
{
static_assert(sizeof(T) == 4);
#ifdef _WIN32
if (OptWaitOnAddress)
{
if (weight > 1)
{
OptWakeByAddressAll(&var);
}
else if (weight == 1)
{
OptWakeByAddressSingle(&var);
}
return;
}
for (u32 i = 0; i < weight; i++)
{
NtReleaseKeyedEvent(nullptr, &var, false, nullptr);
}
#else
if (weight)
{
futex(&var, FUTEX_WAKE_PRIVATE, std::min<u32>(INT_MAX, weight));
}
return;
#endif
}

View File

@ -2,7 +2,6 @@
#include "Utilities/Timer.h"
#include "Emu/Cell/lv2/sys_memory.h"
#include "Utilities/sema.h"
#include "Utilities/Thread.h"
#include <map>

View File

@ -5,8 +5,6 @@
#include "Emu/Io/MouseHandler.h"
#include "Utilities/sema.h"
#include "cellMouse.h"
extern logs::channel sys_io;

View File

@ -1383,7 +1383,7 @@ extern void ppu_initialize(const ppu_module& info)
std::shared_ptr<jit_compiler> jit;
// Compiler mutex (global)
static semaphore<> jmutex;
static shared_mutex jmutex;
// Initialize global semaphore with the max number of threads
u32 max_threads = static_cast<u32>(g_cfg.core.llvm_threads);

View File

@ -83,7 +83,7 @@ struct lv2_event_queue final : public lv2_obj
const u64 key;
const s32 size;
semaphore<> mutex;
shared_mutex mutex;
std::deque<lv2_event> events;
std::deque<cpu_thread*> sq;

View File

@ -40,7 +40,7 @@ struct lv2_event_flag final : lv2_obj
const s32 type;
const u64 name;
semaphore<> mutex;
shared_mutex mutex;
atomic_t<u32> waiters{0};
atomic_t<u64> pattern;
std::deque<cpu_thread*> sq;

View File

@ -27,7 +27,7 @@ struct lv2_lwcond final : lv2_obj
const u32 lwid;
vm::ptr<sys_lwcond_t> control;
semaphore<> mutex;
shared_mutex mutex;
atomic_t<u32> waiters{0};
std::deque<cpu_thread*> sq;

View File

@ -57,7 +57,7 @@ struct lv2_lwmutex final : lv2_obj
const vm::ptr<sys_lwmutex_t> control;
const u64 name;
semaphore<> mutex;
shared_mutex mutex;
atomic_t<u32> signaled{0};
std::deque<cpu_thread*> sq;

View File

@ -56,7 +56,7 @@ struct page_fault_event
struct page_fault_event_entries
{
std::vector<page_fault_event> events;
semaphore<> pf_mutex;
shared_mutex pf_mutex;
};
// SysCalls

View File

@ -31,7 +31,7 @@ struct lv2_mutex final : lv2_obj
const u64 name;
const s32 flags;
semaphore<> mutex;
shared_mutex mutex;
atomic_t<u32> owner{0}; // Owner Thread ID
atomic_t<u32> lock_count{0}; // Recursive Locks
atomic_t<u32> cond_count{0}; // Condition Variables

View File

@ -30,7 +30,7 @@ LOG_CHANNEL(sys_net);
static std::vector<ppu_thread*> s_to_awake;
static semaphore<> s_nw_mutex;
static shared_mutex s_nw_mutex;
extern u64 get_system_time();

View File

@ -1,7 +1,7 @@
#pragma once
#include "Utilities/bit_set.h"
#include "Utilities/sema.h"
#include "Utilities/mutex.h"
#include <vector>
#include <utility>
@ -324,7 +324,7 @@ struct lv2_socket final
lv2_socket(socket_type s);
~lv2_socket();
semaphore<> mutex;
shared_mutex mutex;
#ifdef _WIN32
// Remember events (WSAEnumNetworkEvents)

View File

@ -27,7 +27,7 @@ struct lv2_rwlock final : lv2_obj
const u64 name;
const s32 flags;
semaphore<> mutex;
shared_mutex mutex;
atomic_t<s64> owner{0};
std::deque<cpu_thread*> rq;
std::deque<cpu_thread*> wq;

View File

@ -28,7 +28,7 @@ struct lv2_sema final : lv2_obj
const s32 flags;
const s32 max;
semaphore<> mutex;
shared_mutex mutex;
atomic_t<s32> val;
std::deque<cpu_thread*> sq;

View File

@ -215,7 +215,7 @@ struct lv2_obj
private:
// Scheduler mutex
static semaphore<> g_mutex;
static shared_mutex g_mutex;
// Scheduler queue for active PPU threads
static std::deque<class ppu_thread*> g_ppu;

View File

@ -24,7 +24,7 @@ struct lv2_timer_context : lv2_obj
void operator()();
void on_abort();
semaphore<> mutex;
shared_mutex mutex;
atomic_t<u32> state{SYS_TIMER_STATE_STOP};
std::weak_ptr<lv2_event_queue> port;

View File

@ -1,7 +1,7 @@
#pragma once
#include <list>
#include "Utilities/sema.h"
#include "Utilities/mutex.h"
// TODO: HLE info (constants, structs, etc.) should not be available here
@ -145,7 +145,7 @@ protected:
}
public:
semaphore<> mutex;
shared_mutex mutex;
virtual void Init(const u32 max_connect) = 0;
virtual ~MouseHandlerBase() = default;

View File

@ -13,7 +13,7 @@
#include <deque>
#include <mutex>
#include "Utilities/sema.h"
#include "Utilities/mutex.h"
extern fs::file g_tty;
extern atomic_t<s64> g_tty_size;