mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-10 06:44:34 +00:00
Rewrite cond_variable
to use waitable atomics
Increase max_timeout and fix max_timeout usage
This commit is contained in:
parent
67f31c17d1
commit
d13ff285d1
@ -1778,11 +1778,13 @@ void thread_base::finalize() noexcept
|
|||||||
--g_thread_count;
|
--g_thread_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool thread_ctrl::_wait_for(u64 usec)
|
void thread_ctrl::_wait_for(u64 usec)
|
||||||
{
|
{
|
||||||
auto _this = g_tls_this_thread;
|
auto _this = g_tls_this_thread;
|
||||||
|
|
||||||
do
|
std::unique_lock lock(_this->m_mutex, std::defer_lock);
|
||||||
|
|
||||||
|
while (true)
|
||||||
{
|
{
|
||||||
// Mutex is unlocked at the start and after the waiting
|
// Mutex is unlocked at the start and after the waiting
|
||||||
if (u32 sig = _this->m_signal.load())
|
if (u32 sig = _this->m_signal.load())
|
||||||
@ -1790,17 +1792,20 @@ bool thread_ctrl::_wait_for(u64 usec)
|
|||||||
if (sig & 1)
|
if (sig & 1)
|
||||||
{
|
{
|
||||||
_this->m_signal &= ~1;
|
_this->m_signal &= ~1;
|
||||||
return true;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (usec == 0)
|
if (usec == 0)
|
||||||
{
|
{
|
||||||
// No timeout: return immediately
|
// No timeout: return immediately
|
||||||
return false;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
_this->m_mutex.lock();
|
if (!lock)
|
||||||
|
{
|
||||||
|
lock.lock();
|
||||||
|
}
|
||||||
|
|
||||||
// Double-check the value
|
// Double-check the value
|
||||||
if (u32 sig = _this->m_signal.load())
|
if (u32 sig = _this->m_signal.load())
|
||||||
@ -1808,15 +1813,17 @@ bool thread_ctrl::_wait_for(u64 usec)
|
|||||||
if (sig & 1)
|
if (sig & 1)
|
||||||
{
|
{
|
||||||
_this->m_signal &= ~1;
|
_this->m_signal &= ~1;
|
||||||
_this->m_mutex.unlock();
|
return;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
while (_this->m_cond.wait_unlock(std::exchange(usec, usec > cond_variable::max_timeout ? -1 : 0), _this->m_mutex));
|
|
||||||
|
|
||||||
// Timeout
|
_this->m_cond.wait_unlock(usec, lock);
|
||||||
return false;
|
|
||||||
|
if (usec < cond_variable::max_timeout)
|
||||||
|
{
|
||||||
|
usec = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_base::thread_base(std::string_view name)
|
thread_base::thread_base(std::string_view name)
|
||||||
|
@ -185,7 +185,7 @@ class thread_ctrl final
|
|||||||
static atomic_t<native_core_arrangement> g_native_core_layout;
|
static atomic_t<native_core_arrangement> g_native_core_layout;
|
||||||
|
|
||||||
// Internal waiting function, may throw. Infinite value is -1.
|
// Internal waiting function, may throw. Infinite value is -1.
|
||||||
static bool _wait_for(u64 usec);
|
static void _wait_for(u64 usec);
|
||||||
|
|
||||||
friend class thread_base;
|
friend class thread_base;
|
||||||
|
|
||||||
@ -235,9 +235,9 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Wait once with timeout. May spuriously return false.
|
// Wait once with timeout. May spuriously return false.
|
||||||
static inline bool wait_for(u64 usec)
|
static inline void wait_for(u64 usec)
|
||||||
{
|
{
|
||||||
return _wait_for(usec);
|
_wait_for(usec);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait.
|
// Wait.
|
||||||
|
@ -8,50 +8,57 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
|
// use constants, increase signal space
|
||||||
|
|
||||||
|
void cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
|
||||||
{
|
{
|
||||||
verify("cond_variable overflow" HERE), (_old & 0xffff) != 0xffff; // Very unlikely: it requires 65535 distinct threads to wait simultaneously
|
// Not supposed to fail
|
||||||
|
verify(HERE), _old;
|
||||||
|
|
||||||
return balanced_wait_until(m_value, _timeout, [&](u32& value, auto... ret) -> int
|
// Wait with timeout
|
||||||
|
m_value.wait(_old, atomic_wait_timeout{_timeout > max_timeout ? UINT64_MAX : _timeout * 1000});
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
m_value.atomic_op([](u32& value)
|
||||||
{
|
{
|
||||||
if (value >> 16)
|
value -= c_waiter_mask & -c_waiter_mask;
|
||||||
{
|
|
||||||
// Success
|
|
||||||
value -= 0x10001;
|
|
||||||
return +1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if constexpr (sizeof...(ret))
|
if ((value & c_waiter_mask) == 0)
|
||||||
{
|
{
|
||||||
// Retire
|
// Last waiter removed, clean signals
|
||||||
value -= 1;
|
value = 0;
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
|
||||||
});
|
});
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
if (_old >= 0x10000 && !OptWaitOnAddress && m_value)
|
|
||||||
{
|
|
||||||
// Workaround possibly stolen signal
|
|
||||||
imp_wake(1);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cond_variable::imp_wake(u32 _count) noexcept
|
void cond_variable::imp_wake(u32 _count) noexcept
|
||||||
{
|
{
|
||||||
// TODO (notify_one)
|
const auto [_old, ok] = m_value.fetch_op([](u32& value)
|
||||||
balanced_awaken<true>(m_value, m_value.atomic_op([&](u32& value) -> u32
|
|
||||||
{
|
{
|
||||||
// Subtract already signaled number from total amount of waiters
|
if (!value || (value & c_signal_mask) == c_signal_mask)
|
||||||
const u32 can_sig = (value & 0xffff) - (value >> 16);
|
{
|
||||||
const u32 num_sig = std::min<u32>(can_sig, _count);
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
value += num_sig << 16;
|
// Add signal
|
||||||
return num_sig;
|
value += c_signal_mask & -c_signal_mask;
|
||||||
}));
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!ok || !_count)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_count > 1 || ((_old + (c_signal_mask & -c_signal_mask)) & c_signal_mask) == c_signal_mask)
|
||||||
|
{
|
||||||
|
// Resort to notify_all if signal count reached max
|
||||||
|
m_value.notify_all();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_value.notify_one();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool shared_cond::imp_wait(u32 slot, u64 _timeout) noexcept
|
bool shared_cond::imp_wait(u32 slot, u64 _timeout) noexcept
|
||||||
|
@ -11,9 +11,31 @@ class cond_variable
|
|||||||
// Internal waiter counter
|
// Internal waiter counter
|
||||||
atomic_t<u32> m_value{0};
|
atomic_t<u32> m_value{0};
|
||||||
|
|
||||||
|
enum : u32
|
||||||
|
{
|
||||||
|
c_waiter_mask = 0x1fff,
|
||||||
|
c_signal_mask = 0xffffffff & ~c_waiter_mask,
|
||||||
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
// Increment waiter count
|
||||||
|
u32 add_waiter() noexcept
|
||||||
|
{
|
||||||
|
return m_value.atomic_op([](u32& value) -> u32
|
||||||
|
{
|
||||||
|
if ((value & c_signal_mask) == c_signal_mask || (value & c_waiter_mask) == c_waiter_mask)
|
||||||
|
{
|
||||||
|
// Signal or waiter overflow, return immediately
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
value += c_waiter_mask & -c_waiter_mask;
|
||||||
|
return value;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Internal waiting function
|
// Internal waiting function
|
||||||
bool imp_wait(u32 _old, u64 _timeout) noexcept;
|
void imp_wait(u32 _old, u64 _timeout) noexcept;
|
||||||
|
|
||||||
// Try to notify up to _count threads
|
// Try to notify up to _count threads
|
||||||
void imp_wake(u32 _count) noexcept;
|
void imp_wake(u32 _count) noexcept;
|
||||||
@ -23,22 +45,33 @@ public:
|
|||||||
|
|
||||||
// Intrusive wait algorithm for lockable objects
|
// Intrusive wait algorithm for lockable objects
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool wait(T& object, u64 usec_timeout = -1)
|
void wait(T& object, u64 usec_timeout = -1) noexcept
|
||||||
{
|
{
|
||||||
const u32 _old = m_value.fetch_add(1); // Increment waiter counter
|
const u32 _old = add_waiter();
|
||||||
|
|
||||||
|
if (!_old)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
object.unlock();
|
object.unlock();
|
||||||
const bool res = imp_wait(_old, usec_timeout);
|
imp_wait(_old, usec_timeout);
|
||||||
object.lock();
|
object.lock();
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unlock all specified objects but don't lock them again
|
// Unlock all specified objects but don't lock them again
|
||||||
template <typename... Locks>
|
template <typename... Locks>
|
||||||
bool wait_unlock(u64 usec_timeout, Locks&&... locks)
|
void wait_unlock(u64 usec_timeout, Locks&&... locks)
|
||||||
{
|
{
|
||||||
const u32 _old = m_value.fetch_add(1); // Increment waiter counter
|
const u32 _old = add_waiter();
|
||||||
(..., std::forward<Locks>(locks).unlock());
|
(..., std::forward<Locks>(locks).unlock());
|
||||||
return imp_wait(_old, usec_timeout);
|
|
||||||
|
if (!_old)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
imp_wait(_old, usec_timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wake one thread
|
// Wake one thread
|
||||||
@ -55,11 +88,11 @@ public:
|
|||||||
{
|
{
|
||||||
if (m_value)
|
if (m_value)
|
||||||
{
|
{
|
||||||
imp_wake(65535);
|
imp_wake(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr u64 max_timeout = u64{UINT32_MAX} / 1000 * 1000000;
|
static constexpr u64 max_timeout = UINT64_MAX / 1000;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Condition variable fused with a pseudo-mutex supporting only reader locks (up to 32 readers).
|
// Condition variable fused with a pseudo-mutex supporting only reader locks (up to 32 readers).
|
||||||
|
@ -236,10 +236,10 @@ public:
|
|||||||
static_assert(UINT64_MAX / cond_variable::max_timeout >= g_cfg.core.clocks_scale.max, "timeout may overflow during scaling");
|
static_assert(UINT64_MAX / cond_variable::max_timeout >= g_cfg.core.clocks_scale.max, "timeout may overflow during scaling");
|
||||||
|
|
||||||
// Clamp to max timeout accepted
|
// Clamp to max timeout accepted
|
||||||
if (usec > cond_variable::max_timeout) usec = cond_variable::max_timeout;
|
const u64 max_usec = cond_variable::max_timeout * 100 / g_cfg.core.clocks_scale.max;
|
||||||
|
|
||||||
// Now scale the result
|
// Now scale the result
|
||||||
usec = (usec * g_cfg.core.clocks_scale) / 100;
|
usec = (std::min<u64>(usec, max_usec) * g_cfg.core.clocks_scale) / 100;
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
// TODO: Confirm whether Apple or any BSD can benefit from this as well
|
// TODO: Confirm whether Apple or any BSD can benefit from this as well
|
||||||
@ -271,7 +271,7 @@ public:
|
|||||||
// Do not wait for the last quantum to avoid loss of accuracy
|
// Do not wait for the last quantum to avoid loss of accuracy
|
||||||
thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum));
|
thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum));
|
||||||
#else
|
#else
|
||||||
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
|
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
|
||||||
thread_ctrl::wait_for(remaining - (remaining % host_min_quantum));
|
thread_ctrl::wait_for(remaining - (remaining % host_min_quantum));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user