mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-11 06:40:39 +00:00
LV2: Optimizations and fixes
Fix and optimize sys_ppu_thread_yield Fix LV2 syscalls with timeout bug. (use ppu_thread::cancel_sleep instead) Move timeout notification out of mutex scope Allow g_waiting timeouts to be awaked in scope
This commit is contained in:
parent
cba4c3cdc4
commit
c4cc0154be
@ -700,6 +700,12 @@ bool cpu_thread::check_state() noexcept
|
||||
store = true;
|
||||
}
|
||||
|
||||
if (flags & cpu_flag::notify)
|
||||
{
|
||||
flags -= cpu_flag::notify;
|
||||
store = true;
|
||||
}
|
||||
|
||||
// Can't process dbg_step if we only paused temporarily
|
||||
if (cpu_can_stop && flags & cpu_flag::dbg_step)
|
||||
{
|
||||
@ -779,6 +785,8 @@ bool cpu_thread::check_state() noexcept
|
||||
if ((state1 ^ state) - pending_and_temp)
|
||||
{
|
||||
// Work could have changed flags
|
||||
// Reset internal flags as if check_state() has just been called
|
||||
cpu_sleep_called = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ enum class cpu_flag : u32
|
||||
memory, // Thread must unlock memory mutex
|
||||
pending, // Thread has postponed work
|
||||
pending_recheck, // Thread needs to recheck if there is pending work before ::pending removal
|
||||
notify, // Flag meant solely to allow atomic notification on state without changing other flags
|
||||
|
||||
dbg_global_pause, // Emulation paused
|
||||
dbg_pause, // Thread paused
|
||||
@ -174,7 +175,7 @@ public:
|
||||
virtual void cpu_sleep() {}
|
||||
|
||||
// Callback for cpu_flag::pending
|
||||
virtual void cpu_work() {}
|
||||
virtual void cpu_work() { state -= cpu_flag::pending + cpu_flag::pending_recheck; }
|
||||
|
||||
// Callback for cpu_flag::ret
|
||||
virtual void cpu_return() {}
|
||||
|
@ -270,6 +270,8 @@ public:
|
||||
|
||||
alignas(64) const ppu_func_opd_t entry_func;
|
||||
u64 start_time{0}; // Sleep start timepoint
|
||||
u64 end_time{umax}; // Sleep end timepoint
|
||||
s32 cancel_sleep{0}; // Flag to cancel the next lv2_obj::sleep call (when equals 2)
|
||||
u64 syscall_args[8]{0}; // Last syscall arguments stored
|
||||
const char* current_function{}; // Current function name for diagnosis, optimized for speed.
|
||||
const char* last_function{}; // Sticky copy of current_function, is not cleared on function return
|
||||
|
@ -1206,24 +1206,27 @@ namespace cpu_counter
|
||||
void remove(cpu_thread*) noexcept;
|
||||
}
|
||||
|
||||
void lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)
|
||||
bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)
|
||||
{
|
||||
// Should already be performed when using this flag
|
||||
if (!g_postpone_notify_barrier)
|
||||
{
|
||||
prepare_for_sleep(cpu);
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
const u64 current_time = get_guest_system_time();
|
||||
{
|
||||
std::lock_guard lock{g_mutex};
|
||||
sleep_unlocked(cpu, timeout);
|
||||
|
||||
result = sleep_unlocked(cpu, timeout, current_time);
|
||||
|
||||
if (!g_to_awake.empty())
|
||||
{
|
||||
// Schedule pending entries
|
||||
awake_unlocked({});
|
||||
}
|
||||
|
||||
schedule_all();
|
||||
schedule_all(current_time);
|
||||
}
|
||||
|
||||
if (!g_postpone_notify_barrier)
|
||||
@ -1232,6 +1235,7 @@ void lv2_obj::sleep(cpu_thread& cpu, const u64 timeout)
|
||||
}
|
||||
|
||||
g_to_awake.clear();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool lv2_obj::awake(cpu_thread* thread, s32 prio)
|
||||
@ -1261,19 +1265,23 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio)
|
||||
|
||||
bool lv2_obj::yield(cpu_thread& thread)
|
||||
{
|
||||
vm::temporary_unlock(thread);
|
||||
|
||||
if (auto ppu = thread.try_get<ppu_thread>())
|
||||
{
|
||||
ppu->raddr = 0; // Clear reservation
|
||||
|
||||
if (!atomic_storage<ppu_thread*>::load(ppu->next_ppu))
|
||||
{
|
||||
// Nothing to do
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return awake(&thread, yield_cmd);
|
||||
}
|
||||
|
||||
void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout)
|
||||
bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
|
||||
{
|
||||
const u64 start_time = get_guest_system_time();
|
||||
const u64 start_time = current_time;
|
||||
|
||||
auto on_to_sleep_update = [&]()
|
||||
{
|
||||
@ -1299,15 +1307,32 @@ void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout)
|
||||
}
|
||||
};
|
||||
|
||||
bool return_val = true;
|
||||
|
||||
if (auto ppu = thread.try_get<ppu_thread>())
|
||||
{
|
||||
ppu_log.trace("sleep() - waiting (%zu)", g_pending);
|
||||
|
||||
const auto [_ ,ok] = ppu->state.fetch_op([&](bs_t<cpu_flag>& val)
|
||||
if (ppu->ack_suspend)
|
||||
{
|
||||
ppu->ack_suspend = false;
|
||||
g_pending--;
|
||||
}
|
||||
|
||||
if (std::exchange(ppu->cancel_sleep, 0) == 2)
|
||||
{
|
||||
// Signal that the underlying LV2 operation has been cancelled and replaced with a short yield
|
||||
return_val = false;
|
||||
}
|
||||
|
||||
const auto [_, ok] = ppu->state.fetch_op([&](bs_t<cpu_flag>& val)
|
||||
{
|
||||
if (!(val & cpu_flag::signal))
|
||||
{
|
||||
val += cpu_flag::suspend;
|
||||
|
||||
// Flag used for forced timeout notification
|
||||
ensure(!timeout || !(val & cpu_flag::notify));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1316,8 +1341,8 @@ void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout)
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
ppu_log.trace("sleep() failed (signaled) (%s)", ppu->current_function);
|
||||
return;
|
||||
ppu_log.fatal("sleep() failed (signaled) (%s)", ppu->current_function);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find and remove the thread
|
||||
@ -1328,20 +1353,17 @@ void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout)
|
||||
g_to_sleep.erase(it);
|
||||
ppu->start_time = start_time;
|
||||
on_to_sleep_update();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Already sleeping
|
||||
ppu_log.trace("sleep(): called on already sleeping thread.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (std::exchange(ppu->ack_suspend, false))
|
||||
{
|
||||
g_pending--;
|
||||
return false;
|
||||
}
|
||||
|
||||
ppu->raddr = 0; // Clear reservation
|
||||
ppu->start_time = start_time;
|
||||
ppu->end_time = timeout ? start_time + std::min<u64>(timeout, ~start_time) : u64{umax};
|
||||
}
|
||||
else if (auto spu = thread.try_get<spu_thread>())
|
||||
{
|
||||
@ -1349,14 +1371,15 @@ void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout)
|
||||
{
|
||||
g_to_sleep.erase(it);
|
||||
on_to_sleep_update();
|
||||
return true;
|
||||
}
|
||||
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (timeout)
|
||||
{
|
||||
const u64 wait_until = start_time + timeout;
|
||||
const u64 wait_until = start_time + std::min<u64>(timeout, ~start_time);
|
||||
|
||||
// Register timeout if necessary
|
||||
for (auto it = g_waiting.cbegin(), end = g_waiting.cend();; it++)
|
||||
@ -1368,6 +1391,8 @@ void lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return return_val;
|
||||
}
|
||||
|
||||
bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
|
||||
@ -1403,59 +1428,37 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
|
||||
|
||||
if (ppu == cpu)
|
||||
{
|
||||
auto ppu2_next = &ppu->next_ppu;
|
||||
auto ppu2 = ppu->next_ppu;
|
||||
|
||||
if (auto next = +*ppu2_next; !next || next->prio != ppu->prio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (;; i++)
|
||||
{
|
||||
const auto next = +*ppu2_next;
|
||||
|
||||
if (auto next2 = +next->next_ppu; !next2 || next2->prio != ppu->prio)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
ppu2_next = &next->next_ppu;
|
||||
}
|
||||
|
||||
if (ppu2_next == &ppu->next_ppu)
|
||||
if (!ppu2 || ppu2->prio != ppu->prio)
|
||||
{
|
||||
// Empty 'same prio' threads list
|
||||
return false;
|
||||
}
|
||||
|
||||
auto ppu2 = +*ppu2_next;
|
||||
for (i++;; i++)
|
||||
{
|
||||
const auto next = ppu2->next_ppu;
|
||||
|
||||
if (!next || next->prio != ppu->prio)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
ppu2 = next;
|
||||
}
|
||||
|
||||
// Rotate current thread to the last position of the 'same prio' threads list
|
||||
*ppu_next = ppu2;
|
||||
|
||||
// Exchange forward pointers
|
||||
if (ppu->next_ppu != ppu2)
|
||||
{
|
||||
auto ppu2_val = +ppu2->next_ppu;
|
||||
ppu2->next_ppu = +ppu->next_ppu;
|
||||
ppu->next_ppu = ppu2_val;
|
||||
*ppu2_next = ppu;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto ppu2_val = +ppu2->next_ppu;
|
||||
ppu2->next_ppu = ppu;
|
||||
ppu->next_ppu = ppu2_val;
|
||||
}
|
||||
*ppu_next = std::exchange(ppu->next_ppu, std::exchange(ppu2->next_ppu, ppu));
|
||||
|
||||
if (i <= g_cfg.core.ppu_threads + 0u)
|
||||
if (i < g_cfg.core.ppu_threads + 0u)
|
||||
{
|
||||
// Threads were rotated, but no context switch was made
|
||||
return false;
|
||||
}
|
||||
|
||||
ppu->start_time = get_guest_system_time();
|
||||
cpu = nullptr; // Disable current thread enqueing, also enable threads list enqueing
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1479,6 +1482,13 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
|
||||
if (next == cpu)
|
||||
{
|
||||
ppu_log.trace("sleep() - suspended (p=%zu)", g_pending);
|
||||
|
||||
if (static_cast<ppu_thread*>(cpu)->cancel_sleep == 1)
|
||||
{
|
||||
// The next sleep call of the thread is cancelled
|
||||
static_cast<ppu_thread*>(cpu)->cancel_sleep = 2;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1510,19 +1520,10 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
|
||||
// Yield changed the queue before
|
||||
bool changed_queue = prio == yield_cmd;
|
||||
|
||||
if (cpu)
|
||||
if (cpu && prio != yield_cmd)
|
||||
{
|
||||
// Emplace current thread
|
||||
if (!emplace_thread(cpu))
|
||||
{
|
||||
if (g_postpone_notify_barrier)
|
||||
{
|
||||
// This flag includes common optimizations regarding syscalls
|
||||
// one of which is to allow a lock-free version of syscalls with awake behave as semaphore post: always notifies the thread, even if it hasn't slept yet
|
||||
cpu->state += cpu_flag::signal;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (emplace_thread(cpu))
|
||||
{
|
||||
changed_queue = true;
|
||||
}
|
||||
@ -1530,35 +1531,16 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
|
||||
else for (const auto _cpu : g_to_awake)
|
||||
{
|
||||
// Emplace threads from list
|
||||
if (!emplace_thread(_cpu))
|
||||
{
|
||||
if (g_postpone_notify_barrier)
|
||||
{
|
||||
_cpu->state += cpu_flag::signal;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (emplace_thread(_cpu))
|
||||
{
|
||||
changed_queue = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove pending if necessary
|
||||
if (g_pending && ((cpu && cpu == get_current_cpu_thread()) || prio == yield_cmd))
|
||||
{
|
||||
if (auto cur = cpu_thread::get_current<ppu_thread>())
|
||||
{
|
||||
if (std::exchange(cur->ack_suspend, false))
|
||||
{
|
||||
g_pending--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto target = +g_ppu;
|
||||
|
||||
// Suspend threads if necessary
|
||||
for (usz i = 0, thread_count = g_cfg.core.ppu_threads; changed_queue && target; target = target->next_ppu, i++)
|
||||
for (usz i = 0, thread_count = g_cfg.core.ppu_threads; target; target = target->next_ppu, i++)
|
||||
{
|
||||
if (i >= thread_count && cpu_flag::suspend - target->state)
|
||||
{
|
||||
@ -1574,6 +1556,17 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
|
||||
}
|
||||
}
|
||||
|
||||
const auto current_ppu = cpu_thread::get_current<ppu_thread>();
|
||||
|
||||
// Remove pending if necessary
|
||||
if (current_ppu)
|
||||
{
|
||||
if (std::exchange(current_ppu->ack_suspend, false))
|
||||
{
|
||||
ensure(g_pending)--;
|
||||
}
|
||||
}
|
||||
|
||||
return changed_queue;
|
||||
}
|
||||
|
||||
@ -1585,12 +1578,12 @@ void lv2_obj::cleanup()
|
||||
g_pending = 0;
|
||||
}
|
||||
|
||||
void lv2_obj::schedule_all()
|
||||
void lv2_obj::schedule_all(u64 current_time)
|
||||
{
|
||||
usz notify_later_idx = 0;
|
||||
|
||||
if (!g_pending && g_to_sleep.empty())
|
||||
{
|
||||
usz notify_later_idx = 0;
|
||||
|
||||
auto target = +g_ppu;
|
||||
|
||||
// Wake up threads
|
||||
@ -1602,8 +1595,9 @@ void lv2_obj::schedule_all()
|
||||
target->state ^= (cpu_flag::signal + cpu_flag::suspend);
|
||||
target->start_time = 0;
|
||||
|
||||
if (notify_later_idx >= std::size(g_to_notify) - 1)
|
||||
if (notify_later_idx == std::size(g_to_notify))
|
||||
{
|
||||
// Out of notification slots, notify locally (resizable container is not worth it)
|
||||
target->state.notify_one(cpu_flag::signal + cpu_flag::suspend);
|
||||
}
|
||||
else
|
||||
@ -1612,19 +1606,39 @@ void lv2_obj::schedule_all()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g_to_notify[notify_later_idx] = nullptr;
|
||||
}
|
||||
|
||||
// Check registered timeouts
|
||||
while (!g_waiting.empty())
|
||||
{
|
||||
auto& pair = g_waiting.front();
|
||||
const auto pair = &g_waiting.front();
|
||||
|
||||
if (pair.first <= get_guest_system_time())
|
||||
if (!current_time)
|
||||
{
|
||||
pair.second->notify();
|
||||
current_time = get_guest_system_time();
|
||||
}
|
||||
|
||||
if (pair->first <= current_time)
|
||||
{
|
||||
const auto target = pair->second;
|
||||
g_waiting.pop_front();
|
||||
|
||||
if (target != cpu_thread::get_current())
|
||||
{
|
||||
// Change cpu_thread::state for the lightweight notification to work
|
||||
ensure(!target->state.test_and_set(cpu_flag::notify));
|
||||
|
||||
// Otherwise notify it to wake itself
|
||||
if (notify_later_idx == std::size(g_to_notify))
|
||||
{
|
||||
// Out of notification slots, notify locally (resizable container is not worth it)
|
||||
target->state.notify_one(cpu_flag::notify);
|
||||
}
|
||||
else
|
||||
{
|
||||
g_to_notify[notify_later_idx++] = &target->state;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1632,6 +1646,12 @@ void lv2_obj::schedule_all()
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (notify_later_idx - 1 < std::size(g_to_notify) - 1)
|
||||
{
|
||||
// Null-terminate the list if it ends before last slot
|
||||
g_to_notify[notify_later_idx] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
ppu_thread_status lv2_obj::ppu_state(ppu_thread* ppu, bool lock_idm, bool lock_lv2)
|
||||
@ -1715,3 +1735,109 @@ bool lv2_obj::has_ppus_in_running_state()
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep)
|
||||
{
|
||||
static_assert(u64{umax} / max_timeout >= 100, "max timeout is not valid for scaling");
|
||||
|
||||
const u64 start_time = get_system_time();
|
||||
|
||||
if (cpu)
|
||||
{
|
||||
if (u64 end_time = cpu->end_time; end_time != umax)
|
||||
{
|
||||
const u64 guest_start = get_guest_system_time(start_time);
|
||||
|
||||
if (end_time <= guest_start)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
usec = end_time - guest_start;
|
||||
scale = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (scale)
|
||||
{
|
||||
// Scale time
|
||||
usec = std::min<u64>(usec, u64{umax} / 100) * 100 / g_cfg.core.clocks_scale;
|
||||
}
|
||||
|
||||
// Clamp
|
||||
usec = std::min<u64>(usec, max_timeout);
|
||||
|
||||
u64 passed = 0;
|
||||
|
||||
atomic_bs_t<cpu_flag> dummy{};
|
||||
const auto& state = cpu ? cpu->state : dummy;
|
||||
auto old_state = +state;
|
||||
|
||||
auto wait_for = [&](u64 timeout)
|
||||
{
|
||||
thread_ctrl::wait_on(state, old_state, timeout);
|
||||
};
|
||||
|
||||
for (;; old_state = state)
|
||||
{
|
||||
if (old_state & cpu_flag::notify)
|
||||
{
|
||||
// Timeout notification has been forced
|
||||
break;
|
||||
}
|
||||
|
||||
if (old_state & cpu_flag::signal)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (::is_stopped(old_state) || thread_ctrl::state() == thread_state::aborting)
|
||||
{
|
||||
return passed >= usec;
|
||||
}
|
||||
|
||||
if (passed >= usec)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
u64 remaining = usec - passed;
|
||||
#ifdef __linux__
|
||||
// NOTE: Assumption that timer initialization has succeeded
|
||||
u64 host_min_quantum = is_usleep && remaining <= 1000 ? 10 : 50;
|
||||
#else
|
||||
// Host scheduler quantum for windows (worst case)
|
||||
// NOTE: On ps3 this function has very high accuracy
|
||||
constexpr u64 host_min_quantum = 500;
|
||||
#endif
|
||||
// TODO: Tune for other non windows operating sytems
|
||||
|
||||
if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
|
||||
{
|
||||
wait_for(remaining);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (remaining > host_min_quantum)
|
||||
{
|
||||
#ifdef __linux__
|
||||
// Do not wait for the last quantum to avoid loss of accuracy
|
||||
wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum));
|
||||
#else
|
||||
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
|
||||
wait_for(remaining - (remaining % host_min_quantum));
|
||||
#endif
|
||||
}
|
||||
// TODO: Determine best value for yield delay
|
||||
else
|
||||
{
|
||||
// Try yielding. May cause long wake latency but helps weaker CPUs a lot by alleviating resource pressure
|
||||
std::this_thread::yield();
|
||||
}
|
||||
}
|
||||
|
||||
passed = get_system_time() - start_time;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -162,6 +162,8 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout)
|
||||
|
||||
lv2_obj::prepare_for_sleep(ppu);
|
||||
|
||||
ppu.cancel_sleep = 1;
|
||||
|
||||
if (s32 signal = mutex.try_own(&ppu))
|
||||
{
|
||||
if (signal == smin)
|
||||
@ -169,12 +171,13 @@ error_code _sys_lwmutex_lock(ppu_thread& ppu, u32 lwmutex_id, u64 timeout)
|
||||
ppu.gpr[3] = CELL_EBUSY;
|
||||
}
|
||||
|
||||
ppu.cancel_sleep = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
mutex.sleep(ppu, timeout);
|
||||
const bool finished = !mutex.sleep(ppu, timeout);
|
||||
notify.cleanup();
|
||||
return false;
|
||||
return finished;
|
||||
});
|
||||
|
||||
if (!mutex)
|
||||
|
@ -135,7 +135,7 @@ struct lv2_lwmutex final : lv2_obj
|
||||
control_data_t store = old;
|
||||
store.signaled |= (unlock2 ? s32{smin} : 1);
|
||||
|
||||
if (lv2_control.compare_and_swap_test(old, store))
|
||||
if (lv2_control.compare_exchange(old, store))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -162,15 +162,19 @@ error_code sys_mutex_lock(ppu_thread& ppu, u32 mutex_id, u64 timeout)
|
||||
{
|
||||
lv2_obj::prepare_for_sleep(ppu);
|
||||
|
||||
if (mutex.try_own(ppu))
|
||||
ppu.cancel_sleep = 1;
|
||||
|
||||
if (mutex.try_own(ppu) || !mutex.sleep(ppu, timeout))
|
||||
{
|
||||
result = {};
|
||||
}
|
||||
else
|
||||
|
||||
if (ppu.cancel_sleep != 1)
|
||||
{
|
||||
mutex.sleep(ppu, timeout);
|
||||
notify.cleanup();
|
||||
}
|
||||
|
||||
ppu.cancel_sleep = 0;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -225,7 +225,7 @@ public:
|
||||
|
||||
private:
|
||||
// Remove the current thread from the scheduling queue, register timeout
|
||||
static void sleep_unlocked(cpu_thread&, u64 timeout);
|
||||
static bool sleep_unlocked(cpu_thread&, u64 timeout, u64 current_time);
|
||||
|
||||
// Schedule the thread
|
||||
static bool awake_unlocked(cpu_thread*, s32 prio = enqueue_cmd);
|
||||
@ -233,7 +233,7 @@ private:
|
||||
public:
|
||||
static constexpr u64 max_timeout = u64{umax} / 1000;
|
||||
|
||||
static void sleep(cpu_thread& cpu, const u64 timeout = 0);
|
||||
static bool sleep(cpu_thread& cpu, const u64 timeout = 0);
|
||||
|
||||
static bool awake(cpu_thread* thread, s32 prio = enqueue_cmd);
|
||||
|
||||
@ -406,95 +406,7 @@ public:
|
||||
return make;
|
||||
}
|
||||
|
||||
template <bool IsUsleep = false, bool Scale = true>
|
||||
static bool wait_timeout(u64 usec, cpu_thread* const cpu = {})
|
||||
{
|
||||
static_assert(u64{umax} / max_timeout >= 100, "max timeout is not valid for scaling");
|
||||
|
||||
if constexpr (Scale)
|
||||
{
|
||||
// Scale time
|
||||
usec = std::min<u64>(usec, u64{umax} / 100) * 100 / g_cfg.core.clocks_scale;
|
||||
}
|
||||
|
||||
// Clamp
|
||||
usec = std::min<u64>(usec, max_timeout);
|
||||
|
||||
u64 passed = 0;
|
||||
|
||||
const u64 start_time = get_system_time();
|
||||
|
||||
auto wait_for = [cpu](u64 timeout)
|
||||
{
|
||||
atomic_bs_t<cpu_flag> dummy{};
|
||||
auto& state = cpu ? cpu->state : dummy;
|
||||
const auto old = +state;
|
||||
|
||||
if (old & cpu_flag::signal)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
thread_ctrl::wait_on(state, old, timeout);
|
||||
return false;
|
||||
};
|
||||
|
||||
while (usec >= passed)
|
||||
{
|
||||
u64 remaining = usec - passed;
|
||||
#ifdef __linux__
|
||||
// NOTE: Assumption that timer initialization has succeeded
|
||||
u64 host_min_quantum = IsUsleep && remaining <= 1000 ? 10 : 50;
|
||||
#else
|
||||
// Host scheduler quantum for windows (worst case)
|
||||
// NOTE: On ps3 this function has very high accuracy
|
||||
constexpr u64 host_min_quantum = 500;
|
||||
#endif
|
||||
// TODO: Tune for other non windows operating sytems
|
||||
bool escape = false;
|
||||
if (g_cfg.core.sleep_timers_accuracy < (IsUsleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
|
||||
{
|
||||
escape = wait_for(remaining);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (remaining > host_min_quantum)
|
||||
{
|
||||
#ifdef __linux__
|
||||
// Do not wait for the last quantum to avoid loss of accuracy
|
||||
escape = wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum));
|
||||
#else
|
||||
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
|
||||
escape = wait_for(remaining - (remaining % host_min_quantum));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
// Try yielding. May cause long wake latency but helps weaker CPUs a lot by alleviating resource pressure
|
||||
std::this_thread::yield();
|
||||
}
|
||||
}
|
||||
|
||||
if (auto cpu0 = get_current_cpu_thread(); cpu0 && cpu0->is_stopped())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (thread_ctrl::state() == thread_state::aborting)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (escape)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
passed = get_system_time() - start_time;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
static bool wait_timeout(u64 usec, ppu_thread* cpu = {}, bool scale = true, bool is_usleep = false);
|
||||
|
||||
static inline void notify_all()
|
||||
{
|
||||
@ -502,9 +414,7 @@ public:
|
||||
{
|
||||
if (!cpu)
|
||||
{
|
||||
g_to_notify[0] = nullptr;
|
||||
g_postpone_notify_barrier = false;
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
if (cpu != &g_to_notify)
|
||||
@ -514,6 +424,9 @@ public:
|
||||
atomic_wait_engine::notify_one(cpu, 4, atomic_wait::default_mask<atomic_bs_t<cpu_flag>>);
|
||||
}
|
||||
}
|
||||
|
||||
g_to_notify[0] = nullptr;
|
||||
g_postpone_notify_barrier = false;
|
||||
}
|
||||
|
||||
// Can be called before the actual sleep call in order to move it out of mutex scope
|
||||
@ -542,7 +455,8 @@ public:
|
||||
}
|
||||
|
||||
// While IDM mutex is still locked (this function assumes so) check if the notification is still needed
|
||||
if (cpu != &g_to_notify && !static_cast<const decltype(cpu_thread::state)*>(cpu)->all_of(cpu_flag::signal + cpu_flag::wait))
|
||||
// Pending flag is meant for forced notification (if the CPU really has pending work it can restore the flag in theory)
|
||||
if (cpu != &g_to_notify && static_cast<const decltype(cpu_thread::state)*>(cpu)->none_of(cpu_flag::signal + cpu_flag::pending))
|
||||
{
|
||||
// Omit it (this is a void pointer, it can hold anything)
|
||||
cpu = &g_to_notify;
|
||||
@ -575,5 +489,5 @@ private:
|
||||
// If a notify_all_t object exists locally, postpone notifications to the destructor of it (not recursive, notifies on the first destructor for safety)
|
||||
static thread_local bool g_postpone_notify_barrier;
|
||||
|
||||
static void schedule_all();
|
||||
static void schedule_all(u64 current_time = 0);
|
||||
};
|
||||
|
@ -409,7 +409,7 @@ error_code sys_timer_usleep(ppu_thread& ppu, u64 sleep_time)
|
||||
{
|
||||
lv2_obj::sleep(ppu, g_cfg.core.sleep_timers_accuracy < sleep_timers_accuracy_level::_usleep ? sleep_time : 0);
|
||||
|
||||
if (!lv2_obj::wait_timeout<true>(sleep_time))
|
||||
if (!lv2_obj::wait_timeout(sleep_time, &ppu, true, true))
|
||||
{
|
||||
ppu.state += cpu_flag::again;
|
||||
}
|
||||
|
@ -3404,7 +3404,7 @@ namespace rsx
|
||||
if (target_rsx_flip_time > time + 1000)
|
||||
{
|
||||
const auto delay_us = target_rsx_flip_time - time;
|
||||
lv2_obj::wait_timeout<false, false>(delay_us);
|
||||
lv2_obj::wait_timeout(delay_us, nullptr, false);
|
||||
performance_counters.idle_time += delay_us;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user