diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 68f6eefb04..659f52fc7c 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -739,6 +739,12 @@ bool cpu_thread::check_state() noexcept cpu_counter::add(this); } + if (state & cpu_flag::pending) + { + // Execute pending work + cpu_work(); + } + if (retval) { cpu_on_stop(); diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 6172432bb3..b944d67d82 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -21,6 +21,7 @@ enum class cpu_flag : u32 ret, // Callback return requested signal, // Thread received a signal (HLE) memory, // Thread must unlock memory mutex + pending, // Thread has postponed work dbg_global_pause, // Emulation paused dbg_pause, // Thread paused @@ -169,6 +170,9 @@ public: // Callback for cpu_flag::suspend virtual void cpu_sleep() {} + // Callback for cpu_flag::pending + virtual void cpu_work() {} + // Callback for cpu_flag::ret virtual void cpu_return() {} diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 2660f997ce..045105d441 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -116,7 +116,10 @@ void spu_interpreter::set_interrupt_status(spu_thread& spu, spu_opcode_t op) spu.set_interrupt_status(false); } - spu.check_mfc_interrupts(spu.pc); + if (spu.check_mfc_interrupts(spu.pc) && spu.state & cpu_flag::pending) + { + spu.do_mfc(); + } } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index ec8a6a2e98..c61aa18173 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1655,6 +1655,18 @@ void spu_thread::cpu_task() } } +void spu_thread::cpu_work() +{ + const auto timeout = +g_cfg.core.mfc_transfers_timeout; + + // If either MFC size exceeds limit or timeout has been reached execute pending MFC commands + if (mfc_size > g_cfg.core.mfc_transfers_shuffling || (timeout && get_system_time() - mfc_last_timestamp >= timeout)) + { + do_mfc(false); + check_mfc_interrupts(pc + 4); + } +} + struct raw_spu_cleanup { raw_spu_cleanup() = default; @@ -2948,14 +2960,15 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) vm::reservation_notifier(addr).notify_all(-128); } -void spu_thread::do_mfc(bool /*wait*/) +void spu_thread::do_mfc(bool can_escape) { u32 removed = 0; u32 barrier = 0; u32 fence = 0; + u16 exec_mask = 0; + bool pending = false; - // Process enqueued commands - static_cast(std::remove_if(mfc_queue + 0, mfc_queue + mfc_size, [&](spu_mfc_cmd& args) + auto process_command = [&](spu_mfc_cmd& args) { // Select tag bit in the tag mask or the stall mask const u32 mask = utils::rol32(1, args.tag); @@ -2992,6 +3005,20 @@ void spu_thread::do_mfc(bool /*wait*/) return false; } + // If command is not enabled in execution mask, execute it later + if (!(exec_mask & (1u << (&args - mfc_queue)))) + { + if (args.cmd & MFC_BARRIER_MASK) + { + barrier |= mask; + } + + // Fence is set for any command + fence |= mask; + pending = true; + return false; + } + if (args.cmd & MFC_LIST_MASK) { if (!(args.tag & 0x80)) @@ -3028,31 +3055,63 @@ void spu_thread::do_mfc(bool /*wait*/) removed++; return true; - })); + }; - mfc_size -= removed; - mfc_barrier = barrier; - mfc_fence = fence; - - if (removed && ch_tag_upd) + auto get_exec_mask = [&size = mfc_size] { - const u32 completed = get_mfc_completed(); + // Get commands' execution mask + // Mask bits are always set when mfc_transfers_shuffling is 0 + return static_cast((0 - (1u << std::min(g_cfg.core.mfc_transfers_shuffling, size))) | __rdtsc()); + }; - if (completed && ch_tag_upd == MFC_TAG_UPDATE_ANY) + // Process enqueued commands + while (true) + { + removed = 0; + barrier = 0; + fence = 0; + + // Shuffle commands execution (if enabled), explicit barriers are obeyed + pending = false; + exec_mask = get_exec_mask(); + + static_cast(std::remove_if(mfc_queue + 0, mfc_queue + mfc_size, process_command)); + + mfc_size -= removed; + mfc_barrier = barrier; + mfc_fence = fence; + + if (removed && ch_tag_upd) { - ch_tag_stat.set_value(completed); - ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE; + const u32 completed = get_mfc_completed(); + + if (completed && ch_tag_upd == MFC_TAG_UPDATE_ANY) + { + ch_tag_stat.set_value(completed); + ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE; + } + else if (completed == ch_tag_mask && ch_tag_upd == MFC_TAG_UPDATE_ALL) + { + ch_tag_stat.set_value(completed); + ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE; + } } - else if (completed == ch_tag_mask && ch_tag_upd == MFC_TAG_UPDATE_ALL) + + if (can_escape && check_mfc_interrupts(pc + 4)) { - ch_tag_stat.set_value(completed); - ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE; + spu_runtime::g_escape(this); + } + + if (!pending) + { + break; } } - if (check_mfc_interrupts(pc + 4)) + if (state & cpu_flag::pending) { - spu_runtime::g_escape(this); + // No more pending work + state -= cpu_flag::pending; } } @@ -3109,6 +3168,15 @@ bool spu_thread::process_mfc_cmd() // Stall infinitely if MFC queue is full while (mfc_size >= 16) [[unlikely]] { + // Reset MFC timestamp in the case of full queue + mfc_last_timestamp = 0; + + // Process MFC commands + if (!test_stopped()) + { + return false; + } + auto old = state.add_fetch(cpu_flag::wait); if (is_stopped(old)) @@ -3382,12 +3450,18 @@ bool spu_thread::process_mfc_cmd() { if (do_dma_check(ch_mfc_cmd)) [[likely]] { - if (ch_mfc_cmd.size) + if (!g_cfg.core.mfc_transfers_shuffling) { - do_dma_transfer(this, ch_mfc_cmd, ls); + if (ch_mfc_cmd.size) + { + do_dma_transfer(this, ch_mfc_cmd, ls); + } + + return true; } - return true; + if (!state.test_and_set(cpu_flag::pending)) + mfc_last_timestamp = get_system_time(); } mfc_queue[mfc_size++] = ch_mfc_cmd; @@ -3429,9 +3503,17 @@ bool spu_thread::process_mfc_cmd() if (do_dma_check(cmd)) [[likely]] { - if (!cmd.size || do_list_transfer(cmd)) [[likely]] + if (!g_cfg.core.mfc_transfers_shuffling) { - return true; + if (!cmd.size || do_list_transfer(cmd)) [[likely]] + { + return true; + } + } + else + { + if (!state.test_and_set(cpu_flag::pending)) + mfc_last_timestamp = get_system_time(); } } @@ -3445,6 +3527,7 @@ bool spu_thread::process_mfc_cmd() if (check_mfc_interrupts(pc + 4)) { + do_mfc(false); spu_runtime::g_escape(this); } @@ -3714,6 +3797,11 @@ s64 spu_thread::get_ch_value(u32 ch) state += cpu_flag::wait + cpu_flag::temp; } + if (state & cpu_flag::pending) + { + do_mfc(); + } + for (int i = 0; i < 10 && channel.get_count() == 0; i++) { busy_wait(); @@ -3739,6 +3827,11 @@ s64 spu_thread::get_ch_value(u32 ch) while (true) { + if (state & cpu_flag::pending) + { + do_mfc(); + } + for (int i = 0; i < 10 && ch_in_mbox.get_count() == 0; i++) { busy_wait(); @@ -3770,13 +3863,17 @@ s64 spu_thread::get_ch_value(u32 ch) case MFC_RdTagStat: { + if (state & cpu_flag::pending) + { + do_mfc(); + } + if (u32 out; ch_tag_stat.try_read(out)) { ch_tag_stat.set_value(0, false); return out; } - // Will stall infinitely return read_channel(ch_tag_stat); } @@ -3929,6 +4026,11 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) { if (get_type() >= spu_type::raw) { + if (state & cpu_flag::pending) + { + do_mfc(); + } + if (ch_out_intr_mbox.get_count()) { state += cpu_flag::wait; @@ -4060,6 +4162,11 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) case SPU_WrOutMbox: { + if (state & cpu_flag::pending) + { + do_mfc(); + } + if (ch_out_mbox.get_count()) { state += cpu_flag::wait; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index d104626cbe..5047676b05 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -631,6 +631,7 @@ public: virtual std::string dump_misc() const override; virtual void cpu_task() override final; virtual void cpu_return() override; + virtual void cpu_work() override; virtual ~spu_thread() override; void cleanup(); void cpu_init(); @@ -668,6 +669,9 @@ public: u32 mfc_barrier = -1; u32 mfc_fence = -1; + // Timestamp of the first postponed command (transfers shuffling related) + u64 mfc_last_timestamp = 0; + // MFC proxy command data spu_mfc_cmd mfc_prxy_cmd; shared_mutex mfc_prxy_mtx; @@ -787,7 +791,7 @@ public: bool do_list_transfer(spu_mfc_cmd& args); void do_putlluc(const spu_mfc_cmd& args); bool do_putllc(const spu_mfc_cmd& args); - void do_mfc(bool wait = true); + void do_mfc(bool can_escape = true); u32 get_mfc_completed() const; bool process_mfc_cmd(); diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 83eb1aa3d2..060b7387ae 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -47,6 +47,8 @@ struct cfg_root : cfg::node cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled cfg::_bool spu_cache{ this, "SPU Cache", true }; cfg::_bool spu_prof{ this, "SPU Profiler", false }; + cfg::uint<0, 16> mfc_transfers_shuffling{ this, "MFC Transfers Shuffling Max Commands", 0 }; + cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Transfers Timeout", 0, true}; cfg::_enum enable_TSX{ this, "Enable TSX", has_rtm() ? tsx_usage::enabled : tsx_usage::disabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false }; cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };