From 898637f83008bfa065b42a3d1f23c0a220ffffcd Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 25 Mar 2018 00:03:32 +0300 Subject: [PATCH] Remove mfc_thread Clear mfc_queue on reset Improve MFC Proxy a bit --- rpcs3/Emu/Cell/MFC.cpp | 322 ------------- rpcs3/Emu/Cell/MFC.h | 29 -- rpcs3/Emu/Cell/RawSPUThread.cpp | 63 ++- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 7 - rpcs3/Emu/Cell/SPUThread.cpp | 597 +++++++++++++++++-------- rpcs3/Emu/Cell/SPUThread.h | 31 +- rpcs3/Emu/Cell/lv2/sys_spu.cpp | 12 +- rpcs3/Emu/Cell/lv2/sys_spu.h | 4 +- rpcs3/Emu/System.cpp | 17 - 9 files changed, 477 insertions(+), 605 deletions(-) diff --git a/rpcs3/Emu/Cell/MFC.cpp b/rpcs3/Emu/Cell/MFC.cpp index 01ea85be3a..b50ba1a176 100644 --- a/rpcs3/Emu/Cell/MFC.cpp +++ b/rpcs3/Emu/Cell/MFC.cpp @@ -63,325 +63,3 @@ void fmt_class_string::format(std::string& out, u64 arg) return unknown; }); } - -mfc_thread::mfc_thread() - : cpu_thread(0) -{ -} - -mfc_thread::~mfc_thread() -{ -} - -std::string mfc_thread::get_name() const -{ - return "MFC Thread"; -} - -void mfc_thread::cpu_task() -{ - vm::passive_lock(*this); - - u32 no_updates = 0; - - while (!m_spus.empty() || m_spuq.size() != 0) - { - // Add or remove destroyed SPU threads - while (m_spuq.size()) - { - auto& thread_ptr = m_spuq[0]; - - // Look for deleted threads if nullptr received - for (auto it = m_spus.cbegin(); !thread_ptr && it != m_spus.cend();) - { - if (test(it->get()->state, cpu_flag::exit)) - { - it = m_spus.erase(it); - } - else - { - it++; - } - } - - // Add thread - if (thread_ptr) - { - m_spus.emplace_back(std::move(thread_ptr)); - } - - m_spuq.end_pop(); - no_updates = 0; - } - - test_state(); - - // Process SPU threads - for (const auto& thread_ptr : m_spus) - { - SPUThread& spu = *thread_ptr; - - const auto proxy_size = spu.mfc_proxy.size(); - const auto queue_size = spu.mfc_queue.size(); - - if (proxy_size) - { - const auto& cmd = spu.mfc_proxy[0]; - - spu.do_dma_transfer(cmd); - - if (cmd.cmd & MFC_START_MASK && !spu.status.test_and_set(SPU_STATUS_RUNNING)) - { - spu.run(); - } - - spu.mfc_proxy.end_pop(); - no_updates = 0; - } - - test_state(); - - if (queue_size) - { - u32 fence_mask = 0; // Using this instead of stall_mask to avoid a possible race condition - u32 barrier_mask = 0; - bool first = true; - for (u32 i = 0; i < spu.mfc_queue.size(); i++, first = false) - { - auto& cmd = spu.mfc_queue[i]; - - // this check all revolves around a potential 'stalled list' in the queue as its the one thing that can cause out of order mfc list execution currently - // a list with barrier hard blocks that tag until it's been dealt with - // and a new command that has a fence cant be executed until the stalled list has been dealt with - if ((cmd.size != 0) && ((barrier_mask & (1u << cmd.tag)) || ((cmd.cmd & MFC_FENCE_MASK) && ((1 << cmd.tag) & fence_mask)))) - continue; - - if ((cmd.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK)) == MFC_PUTQLLUC_CMD) - { - auto& data = vm::_ref(cmd.eal); - const auto to_write = spu._ref(cmd.lsa & 0x3ffff); - - cmd.size = 0; - no_updates = 0; - - vm::reservation_acquire(cmd.eal, 128); - - // Store unconditionally - if (s_use_rtm && utils::transaction_enter()) - { - if (!vm::reader_lock{ vm::try_to_lock }) - { - _xabort(0); - } - - data = to_write; - vm::reservation_update(cmd.eal, 128); - vm::notify(cmd.eal, 128); - _xend(); - } - else - { - vm::writer_lock lock(0); - data = to_write; - vm::reservation_update(cmd.eal, 128); - vm::notify(cmd.eal, 128); - } - } - else if (cmd.cmd & MFC_LIST_MASK && LIKELY(cmd.cmd != MFC_SYNC_CMD)) - { - struct list_element - { - be_t sb; // Stall-and-Notify bit (0x8000) - be_t ts; // List Transfer Size - be_t ea; // External Address Low - }; - - if (cmd.size && (spu.ch_stall_mask & (1u << cmd.tag)) == 0) - { - cmd.lsa &= 0x3fff0; - - // try to get the whole list done in one go - while (cmd.size != 0) - { - const list_element item = spu._ref(cmd.eal & 0x3fff8); - - const u32 size = item.ts; - const u32 addr = item.ea; - - if (size) - { - spu_mfc_cmd transfer; - transfer.eal = addr; - transfer.eah = 0; - transfer.lsa = cmd.lsa | (addr & 0xf); - transfer.tag = cmd.tag; - transfer.cmd = MFC(cmd.cmd & ~MFC_LIST_MASK); - transfer.size = size; - - spu.do_dma_transfer(transfer); - cmd.lsa += std::max(size, 16); - } - - cmd.eal += 8; - cmd.size -= 8; - no_updates = 0; - - // dont stall for last 'item' in list - if ((item.sb & 0x8000) && (cmd.size != 0)) - { - spu.ch_stall_mask |= (1 << cmd.tag); - spu.ch_stall_stat.push_or(spu, 1 << cmd.tag); - - const u32 evt = spu.ch_event_stat.fetch_or(SPU_EVENT_SN); - - if (evt & SPU_EVENT_WAITING) - { - spu.notify(); - } - break; - } - } - } - - if (cmd.size != 0 && (cmd.cmd & MFC_BARRIER_MASK)) - barrier_mask |= (1 << cmd.tag); - else if (cmd.size != 0) - fence_mask |= (1 << cmd.tag); - } - else if (UNLIKELY((cmd.cmd & ~0xc) == MFC_BARRIER_CMD)) - { - // Raw barrier commands / sync commands are tag agnostic and hard sync the mfc list - // Need to gaurentee everything ahead of it has processed before this - if (first) - cmd.size = 0; - else - break; - } - else if (LIKELY(cmd.size)) - { - spu.do_dma_transfer(cmd); - cmd.size = 0; - } - if (!cmd.size && first) - { - spu.mfc_queue.end_pop(); - no_updates = 0; - break; - } - else if (!cmd.size && i == 1) - { - // nasty hack, shoving stalled list down one - // this *works* from the idea that the only thing that could have been passed over in position 0 is a stalled list - // todo: this can still create a situation where we say the mfc queue is full when its actually not, which will cause a rough deadlock between spu and mfc - // which will causes a situation where the spu is waiting for the queue to open up but hasnt signaled the stall yet - spu.mfc_queue[1] = spu.mfc_queue[0]; - spu.mfc_queue.end_pop(); - no_updates = 0; - break; - } - } - } - - test_state(); - - if (spu.ch_tag_upd) - { - // Mask incomplete transfers - u32 completed = spu.ch_tag_mask; - { - for (u32 i = 0; i < spu.mfc_queue.size(); i++) - { - const auto& _cmd = spu.mfc_queue[i]; - if (_cmd.size) - completed &= ~(1u << _cmd.tag); - } - } - - if (completed && spu.ch_tag_upd.compare_and_swap_test(1, 0)) - { - spu.ch_tag_stat.push(spu, completed); - no_updates = 0; - } - else if (spu.ch_tag_mask == completed && spu.ch_tag_upd.compare_and_swap_test(2, 0)) - { - spu.ch_tag_stat.push(spu, completed); - no_updates = 0; - } - } - - test_state(); - } - if (no_updates++) - { - if (no_updates >= 3) - { - if (m_spuq.size()) - { - no_updates = 0; - } - - for (const auto& thread_ptr : m_spus) - { - SPUThread& spu = *thread_ptr; - - if (spu.mfc_proxy.size()) - { - no_updates = 0; - break; - } - - if (spu.mfc_queue.size()) - { - auto& cmd = spu.mfc_queue[0]; - - if ((cmd.cmd & MFC_LIST_MASK) == 0 || (spu.ch_stall_mask & (1u << cmd.tag)) == 0) - { - no_updates = 0; - break; - } - } - - if (spu.ch_tag_upd) - { - no_updates = 0; - break; - } - } - - if (no_updates) - { - vm::temporary_unlock(*this); - thread_ctrl::wait_for(100); - } - } - else - { - vm::reader_lock lock; - vm::notify_all(); - } - } - } - - vm::passive_unlock(*this); - state += cpu_flag::stop; -} - -void mfc_thread::add_spu(spu_ptr _spu) -{ - while (!m_spuq.try_push(std::move(_spu))) - { - busy_wait(); - continue; - } - - run(); -} - -void mfc_thread::on_spawn() -{ - if (g_cfg.core.thread_scheduler_enabled) - { - // Bind to same set with the SPUs - thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::spu)); - } -} diff --git a/rpcs3/Emu/Cell/MFC.h b/rpcs3/Emu/Cell/MFC.h index 48e9214227..3438bacb4a 100644 --- a/rpcs3/Emu/Cell/MFC.h +++ b/rpcs3/Emu/Cell/MFC.h @@ -1,10 +1,5 @@ #pragma once -#include "Emu/CPU/CPUThread.h" -#include "Utilities/lockless.h" - -#include - enum MFC : u8 { MFC_PUT_CMD = 0x20, MFC_PUTB_CMD = 0x21, MFC_PUTF_CMD = 0x22, @@ -92,27 +87,3 @@ struct alignas(16) spu_mfc_cmd u32 eal; u32 eah; }; - -class mfc_thread : public cpu_thread -{ - using spu_ptr = std::shared_ptr; - - // SPU threads to poll - std::vector m_spus; - - // SPU threads to enqueue - lf_mpsc m_spuq; - -public: - mfc_thread(); - - virtual ~mfc_thread() override; - - virtual std::string get_name() const override; - - virtual void cpu_task() override; - - virtual void add_spu(spu_ptr _spu); - - virtual void on_spawn() override; -}; diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp index 3bf09706ee..448dc198b9 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.cpp +++ b/rpcs3/Emu/Cell/RawSPUThread.cpp @@ -55,8 +55,7 @@ bool RawSPUThread::read_reg(const u32 addr, u32& value) case MFC_QStatus_offs: { - const auto size = mfc_proxy.size(); - value = (size ? 0 : MFC_PROXY_COMMAND_QUEUE_EMPTY_FLAG) | (8 - size); + value = MFC_PROXY_COMMAND_QUEUE_EMPTY_FLAG | 8; return true; } @@ -71,7 +70,7 @@ bool RawSPUThread::read_reg(const u32 addr, u32& value) value = (ch_out_mbox.get_count() & 0xff) | ((4 - ch_in_mbox.get_count()) << 8 & 0xff00) | (ch_out_intr_mbox.get_count() << 16 & 0xff0000); return true; } - + case SPU_Status_offs: { value = status; @@ -80,7 +79,7 @@ bool RawSPUThread::read_reg(const u32 addr, u32& value) case Prxy_TagStatus_offs: { - value = mfc_proxy.size() ? 0 : +mfc_prxy_mask; + value = mfc_prxy_mask; return true; } @@ -158,18 +157,58 @@ bool RawSPUThread::write_reg(const u32 addr, const u32 value) case MFC_Class_CMD_offs: { g_tls_mfc[index].cmd = MFC(value & 0xff); - do_dma_transfer(g_tls_mfc[index]); - g_tls_mfc[index] = {}; - g_tls_mfc[index].cmd = MFC(MFC_PPU_DMA_CMD_ENQUEUE_SUCCESSFUL); - if (value & MFC_START_MASK) + switch (value & 0xff) { - try_start(); + case MFC_SNDSIG_CMD: + case MFC_SNDSIGB_CMD: + case MFC_SNDSIGF_CMD: + { + g_tls_mfc[index].size = 4; + // Fallthrough + } + case MFC_PUT_CMD: + case MFC_PUTB_CMD: + case MFC_PUTF_CMD: + case MFC_PUTS_CMD: + case MFC_PUTBS_CMD: + case MFC_PUTFS_CMD: + case MFC_GET_CMD: + case MFC_GETB_CMD: + case MFC_GETF_CMD: + case MFC_GETS_CMD: + case MFC_GETBS_CMD: + case MFC_GETFS_CMD: + { + if (g_tls_mfc[index].size) + { + // Perform transfer immediately + do_dma_transfer(g_tls_mfc[index]); + } + + // .cmd should be zero, which is equal to MFC_PPU_DMA_CMD_ENQUEUE_SUCCESSFUL + g_tls_mfc[index] = {}; + + if (value & MFC_START_MASK) + { + try_start(); + } + + return true; + } + case MFC_BARRIER_CMD: + case MFC_EIEIO_CMD: + case MFC_SYNC_CMD: + { + g_tls_mfc[index] = {}; + _mm_mfence(); + return true; + } } - return true; + break; } - + case Prxy_QueryType_offs: { // TODO @@ -264,6 +303,4 @@ void spu_load_exec(const spu_exec_object& elf) spu->cpu_init(); spu->npc = elf.header.e_entry; - - fxm::get_always()->add_spu(std::move(spu)); } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index cb47718ec7..ad673f95a2 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -1276,13 +1276,6 @@ void spu_recompiler::WRCH(spu_opcode_t op) c->unuse(*addr); return; } - case MFC_WrTagMask: - { - c->mov(*addr, SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); - c->mov(SPU_OFF_32(ch_tag_mask), *addr); - c->unuse(*addr); - return; - } case MFC_LSA: { c->mov(*addr, SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 3592337c54..ddf7e6e2b5 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -310,11 +310,32 @@ std::string SPUThread::get_name() const std::string SPUThread::dump() const { - std::string&& ret = cpu_thread::dump(); - ret += fmt::format("\n" "Tag mask: 0x%08x\n" "MFC entries: %u\n", +ch_tag_mask, mfc_queue.size()); - ret += "Registers:\n=========\n"; + std::string ret = cpu_thread::dump(); - for (uint i = 0; i<128; ++i) ret += fmt::format("GPR[%d] = %s\n", i, gpr[i]); + fmt::append(ret, "\nTag Mask: 0x%08x", ch_tag_mask); + fmt::append(ret, "\nMFC Stall: 0x%08x", ch_stall_mask); + fmt::append(ret, "\nMFC Queue Size: %u", mfc_size); + + for (u32 i = 0; i < 16; i++) + { + if (i < mfc_size) + { + fmt::append(ret, "\n[%s #%02u 0x%05x:0x%08x 0x%x]", + mfc_queue[i].cmd, mfc_queue[i].tag, mfc_queue[i].lsa, + mfc_queue[i].eah * 0x100000000ull + mfc_queue[i].eal, mfc_queue[i].size); + } + else + { + fmt::append(ret, "\n[-]"); + } + } + + ret += "\nRegisters:\n========="; + + for (u32 i = 0; i < 128; i++) + { + fmt::append(ret, "\nGPR[%d] = %s", i, gpr[i]); + } return ret; } @@ -327,6 +348,7 @@ void SPUThread::cpu_init() ch_mfc_cmd = {}; srr0 = 0; + mfc_size = 0; ch_tag_upd = 0; ch_tag_mask = 0; mfc_prxy_mask = 0; @@ -501,7 +523,7 @@ void SPUThread::push_snr(u32 number, u32 value) } } -void SPUThread::do_dma_transfer(const spu_mfc_cmd& args, bool from_mfc) +void SPUThread::do_dma_transfer(const spu_mfc_cmd& args) { const bool is_get = (args.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_START_MASK)) == MFC_GET_CMD; @@ -537,8 +559,6 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args, bool from_mfc) } } - if (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK)) _mm_mfence(); - void* dst = vm::base(eal); void* src = vm::base(offset + lsa); @@ -661,45 +681,292 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args, bool from_mfc) } } - if (is_get && from_mfc) + if (is_get) { //_mm_sfence(); } } -void SPUThread::process_mfc_cmd() +bool SPUThread::do_dma_check(const spu_mfc_cmd& args) { - spu::scheduler::concurrent_execution_watchdog watchdog(*this); - LOG_TRACE(SPU, "DMAC: cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x", ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size); - - const auto mfc = fxm::check_unlocked(); - - // Check queue size - auto check_queue_size = [&]() + for (u32 i = 0; i < mfc_size; i++) { - while (mfc_queue.size() >= 16) + if (mfc_queue[i].cmd == MFC_BARRIER_CMD) { - if (test(state, cpu_flag::stop + cpu_flag::dbg_global_stop)) + return false; + } + + if (mfc_queue[i].tag == args.tag && mfc_queue[i].cmd != MFC_EIEIO_CMD) + { + if (args.cmd & MFC_FENCE_MASK || mfc_queue[i].cmd & MFC_BARRIER_MASK) { - return; + return false; + } + } + } + + return true; +} + +bool SPUThread::do_list_transfer(spu_mfc_cmd& args) +{ + vm::reader_lock lock; + + struct list_element + { + be_t sb; // Stall-and-Notify bit (0x8000) + be_t ts; // List Transfer Size + be_t ea; // External Address Low + } item{}; + + while (args.size) + { + if (UNLIKELY(item.sb & 0x8000)) + { + ch_stall_mask |= (1u << args.tag); + + if (!ch_stall_stat.get_count()) + { + ch_event_stat |= SPU_EVENT_SN; } - // TODO: investigate lost notifications - std::this_thread::yield(); - _mm_lfence(); + ch_stall_stat.set_value((1u << args.tag) | ch_stall_stat.get_value()); + return false; } - }; - switch (ch_mfc_cmd.cmd) + args.lsa &= 0x3fff0; + item = _ref(args.eal & 0x3fff8); + + const u32 size = item.ts; + const u32 addr = item.ea; + + LOG_TRACE(SPU, "LIST: addr=0x%x, size=0x%x, lsa=0x%05x, sb=0x%x", addr, size, args.lsa | (addr & 0xf), item.sb); + + if (size) + { + if (!vm::check_addr(addr, size, vm::page_readable | (args.cmd & MFC_PUT_CMD ? vm::page_writable : 0))) + { + Emu.Pause(); + state += cpu_flag::stop; + LOG_FATAL(SPU, "Access violation %s location 0x%x (%s, size=0x%x)", + args.cmd & MFC_PUT_CMD ? "writing" : "reading", addr, args.cmd, size); + + return false; + } + + spu_mfc_cmd transfer; + transfer.eal = addr; + transfer.eah = 0; + transfer.lsa = args.lsa | (addr & 0xf); + transfer.tag = args.tag; + transfer.cmd = MFC(args.cmd & ~MFC_LIST_MASK); + transfer.size = size; + + do_dma_transfer(transfer); + const u32 add_size = std::max(size, 16); + args.lsa += add_size; + } + + args.eal += 8; + args.size -= 8; + } + + return true; +} + +bool SPUThread::do_putlluc(const spu_mfc_cmd& args) +{ + const u32 addr = args.eal; + auto& data = vm::_ref(addr); + const auto to_write = _ref(args.lsa & 0x3ffff); + + vm::reservation_acquire(addr, 128); + + // Store unconditionally + if (s_use_rtm && utils::transaction_enter()) + { + if (!vm::reader_lock{vm::try_to_lock}) + { + _xabort(0); + } + + data = to_write; + vm::reservation_update(addr, 128); + vm::notify(addr, 128); + _xend(); + } + else + { + vm::writer_lock lock(0); + data = to_write; + vm::reservation_update(addr, 128); + vm::notify(addr, 128); + } + + return true; +} + +void SPUThread::do_mfc() +{ + u32 removed = 0; + u32 barrier = 0; + u32 fence = 0; + + // Process enqueued commands + std::remove_if(mfc_queue + 0, mfc_queue + mfc_size, [&](spu_mfc_cmd& args) + { + if ((args.cmd & ~0xc) == MFC_BARRIER_CMD) + { + if (&args - mfc_queue <= removed) + { + // Remove barrier-class command if it's the first in the queue + _mm_mfence(); + removed++; + return true; + } + + if (args.cmd == MFC_BARRIER_CMD) + { + // Block all tags + barrier |= 0xffffffffu; + } + + return false; + } + + // Select tag bit in the tag mask or the stall mask + const u32 mask = 1u << args.tag; + + // A list with barrier hard blocks that tag until it's been dealt with + if (barrier & mask) + { + return false; + } + + // A new command that has a fence can't be executed until the stalled list has been dealt with + if (args.cmd & MFC_FENCE_MASK && fence & mask) + { + return false; + } + + if (args.cmd & MFC_LIST_MASK) + { + if (!test(ch_stall_mask, mask) && do_list_transfer(args)) + { + removed++; + return true; + } + + fence |= mask; + + if (args.cmd & MFC_BARRIER_MASK) + { + barrier |= mask; + } + + if (test(state, cpu_flag::stop)) + { + barrier |= 0xffffffffu; + } + + return false; + } + + if (args.cmd == MFC_PUTQLLUC_CMD) + { + if (do_putlluc(args)) + { + removed++; + return true; + } + + barrier |= 0xffffffffu; + return false; + } + + if (args.size) + { + vm::reader_lock lock; + + if (!vm::check_addr(args.eal, args.size, vm::page_readable | (args.cmd & MFC_PUT_CMD ? vm::page_writable : 0))) + { + Emu.Pause(); + state += cpu_flag::stop; + LOG_FATAL(SPU, "Access violation %s location 0x%x (%s, size=0x%x)", + args.cmd & MFC_PUT_CMD ? "writing" : "reading", + args.eal, args.cmd, args.size); + + barrier |= 0xffffffffu; + return false; + } + + do_dma_transfer(args); + } + + removed++; + return true; + }); + + mfc_size -= removed; + + if (removed && ch_tag_upd) + { + const u32 completed = get_mfc_completed(); + + if (completed && ch_tag_upd == 1) + { + ch_tag_stat.set_value(completed); + ch_tag_upd = 0; + } + else if (completed == ch_tag_mask && ch_tag_upd == 2) + { + ch_tag_stat.set_value(completed); + ch_tag_upd = 0; + } + } +} + +u32 SPUThread::get_mfc_completed() +{ + u32 completed = ch_tag_mask; + + for (u32 i = 0; i < mfc_size; i++) + { + if (mfc_queue[i].cmd != MFC_BARRIER_CMD && mfc_queue[i].cmd != MFC_EIEIO_CMD) + { + completed &= ~(1u << mfc_queue[i].tag); + } + } + + return completed; +} + +bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) +{ + // Stall infinitely if MFC queue is full + while (mfc_size >= 16) + { + if (test(state, cpu_flag::stop)) + { + return false; + } + + thread_ctrl::wait(); + } + + spu::scheduler::concurrent_execution_watchdog watchdog(*this); + LOG_TRACE(SPU, "DMAC: cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x", args.cmd, args.lsa, args.eal, args.tag, args.size); + + switch (args.cmd) { case MFC_GETLLAR_CMD: { - auto& data = vm::_ref(ch_mfc_cmd.eal); + auto& data = vm::_ref(args.eal); - const u32 _addr = ch_mfc_cmd.eal; + const u32 _addr = args.eal; const u64 _time = vm::reservation_acquire(raddr, 128); - if (raddr && raddr != ch_mfc_cmd.eal) + if (raddr && raddr != args.eal) { ch_event_stat |= SPU_EVENT_LR; } @@ -741,8 +1008,9 @@ void SPUThread::process_mfc_cmd() rdata = data; _xend(); - _ref(ch_mfc_cmd.lsa & 0x3ffff) = rdata; - return ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); + _ref(args.lsa & 0x3ffff) = rdata; + ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); + return true; } else { @@ -760,20 +1028,21 @@ void SPUThread::process_mfc_cmd() } // Copy to LS - _ref(ch_mfc_cmd.lsa & 0x3ffff) = rdata; + _ref(args.lsa & 0x3ffff) = rdata; - return ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); + ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS); + return true; } case MFC_PUTLLC_CMD: { // Store conditionally - auto& data = vm::_ref(ch_mfc_cmd.eal); - const auto to_write = _ref(ch_mfc_cmd.lsa & 0x3ffff); + auto& data = vm::_ref(args.eal); + const auto to_write = _ref(args.lsa & 0x3ffff); bool result = false; - if (raddr == ch_mfc_cmd.eal && rtime == vm::reservation_acquire(raddr, 128) && rdata == data) + if (raddr == args.eal && rtime == vm::reservation_acquire(raddr, 128) && rdata == data) { // TODO: vm::check_addr if (s_use_rtm && utils::transaction_enter()) @@ -824,20 +1093,20 @@ void SPUThread::process_mfc_cmd() } raddr = 0; - return; + return true; } case MFC_PUTLLUC_CMD: { - if (raddr && ch_mfc_cmd.eal == raddr) + if (raddr && args.eal == raddr) { ch_event_stat |= SPU_EVENT_LR; raddr = 0; } - auto& data = vm::_ref(ch_mfc_cmd.eal); - const auto to_write = _ref(ch_mfc_cmd.lsa & 0x3ffff); + auto& data = vm::_ref(args.eal); + const auto to_write = _ref(args.lsa & 0x3ffff); - vm::reservation_acquire(ch_mfc_cmd.eal, 128); + vm::reservation_acquire(args.eal, 128); // Store unconditionally // TODO: vm::check_addr @@ -850,32 +1119,36 @@ void SPUThread::process_mfc_cmd() } data = to_write; - vm::reservation_update(ch_mfc_cmd.eal, 128); - vm::notify(ch_mfc_cmd.eal, 128); + vm::reservation_update(args.eal, 128); + vm::notify(args.eal, 128); _xend(); ch_atomic_stat.set_value(MFC_PUTLLUC_SUCCESS); - return; + return true; } vm::writer_lock lock(0); data = to_write; - vm::reservation_update(ch_mfc_cmd.eal, 128); - vm::notify(ch_mfc_cmd.eal, 128); + vm::reservation_update(args.eal, 128); + vm::notify(args.eal, 128); ch_atomic_stat.set_value(MFC_PUTLLUC_SUCCESS); - return; + return true; } case MFC_PUTQLLUC_CMD: { - ch_mfc_cmd.size = 128; - break; + if (UNLIKELY(!do_dma_check(args) || !do_putlluc(args))) + { + mfc_queue[mfc_size++] = args; + } + + return true; } case MFC_SNDSIG_CMD: case MFC_SNDSIGB_CMD: case MFC_SNDSIGF_CMD: { - ch_mfc_cmd.size = 4; + args.size = 4; // Fallthrough } case MFC_PUT_CMD: @@ -888,24 +1161,34 @@ void SPUThread::process_mfc_cmd() case MFC_GETB_CMD: case MFC_GETF_CMD: { - // Try to process small transfers immediately - if (ch_mfc_cmd.size <= max_imm_dma_size && mfc_queue.size() == 0) + if (LIKELY(args.size <= 0x4000)) { - vm::reader_lock lock(vm::try_to_lock); - - if (!lock) + if (UNLIKELY(!do_dma_check(args))) { - break; + mfc_queue[mfc_size++] = args; + return true; } - if (!vm::check_addr(ch_mfc_cmd.eal, ch_mfc_cmd.size, vm::page_readable | (ch_mfc_cmd.cmd & MFC_PUT_CMD ? vm::page_writable : 0))) + if (LIKELY(args.size)) { - // TODO - break; + vm::reader_lock lock; + + if (!vm::check_addr(args.eal, args.size, vm::page_readable | (args.cmd & MFC_PUT_CMD ? vm::page_writable : 0))) + { + Emu.Pause(); + state += cpu_flag::stop; + LOG_FATAL(SPU, "Access violation %s location 0x%x (%s, size=0x%x)", + args.cmd & MFC_PUT_CMD ? "writing" : "reading", + args.eal, args.cmd, args.size); + + mfc_queue[mfc_size++] = args; + return true; + } + + do_dma_transfer(args); } - do_dma_transfer(ch_mfc_cmd, false); - return; + return true; } break; @@ -920,73 +1203,14 @@ void SPUThread::process_mfc_cmd() case MFC_GETLB_CMD: case MFC_GETLF_CMD: { - if (ch_mfc_cmd.size <= max_imm_dma_size && mfc_queue.size() == 0) + if (LIKELY(args.size <= 0x4000)) { - vm::reader_lock lock(vm::try_to_lock); - - if (!lock) + if (UNLIKELY(!do_dma_check(args) || test(ch_stall_mask, 1u << args.tag) || !do_list_transfer(args))) { - break; + mfc_queue[mfc_size++] = args; } - struct list_element - { - be_t sb; - be_t ts; - be_t ea; - }; - - u32 total_size = 0; - - while (ch_mfc_cmd.size && total_size <= max_imm_dma_size) - { - ch_mfc_cmd.lsa &= 0x3fff0; - - const list_element item = _ref(ch_mfc_cmd.eal & 0x3fff8); - - if (item.sb & 0x8000) - { - break; - } - - const u32 size = item.ts; - const u32 addr = item.ea; - - if (size) - { - if (total_size + size > max_imm_dma_size) - { - break; - } - - if (!vm::check_addr(addr, size, vm::page_readable | (ch_mfc_cmd.cmd & MFC_PUT_CMD ? vm::page_writable : 0))) - { - // TODO - break; - } - - spu_mfc_cmd transfer; - transfer.eal = addr; - transfer.eah = 0; - transfer.lsa = ch_mfc_cmd.lsa | (addr & 0xf); - transfer.tag = ch_mfc_cmd.tag; - transfer.cmd = MFC(ch_mfc_cmd.cmd & ~MFC_LIST_MASK); - transfer.size = size; - - do_dma_transfer(transfer); - const u32 add_size = std::max(size, 16); - ch_mfc_cmd.lsa += add_size; - total_size += add_size; - } - - ch_mfc_cmd.eal += 8; - ch_mfc_cmd.size -= 8; - } - - if (ch_mfc_cmd.size == 0) - { - return; - } + return true; } break; @@ -995,30 +1219,25 @@ void SPUThread::process_mfc_cmd() case MFC_EIEIO_CMD: case MFC_SYNC_CMD: { - ch_mfc_cmd.size = 1; - - if (mfc_queue.size() == 0) + if (mfc_size == 0) { _mm_mfence(); - return; + } + else + { + mfc_queue[mfc_size++] = args; } - break; + return true; } default: { - fmt::throw_exception("Unknown command (cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size); + break; } } - // Enqueue - check_queue_size(); - verify(HERE), mfc_queue.try_push(ch_mfc_cmd); - - //if (test(mfc->state, cpu_flag::is_waiting)) - { - mfc->notify(); - } + fmt::throw_exception("Unknown command (cmd=%s, lsa=0x%x, ea=0x%llx, tag=0x%x, size=0x%x)" HERE, + args.cmd, args.lsa, args.eal, args.tag, args.size); } u32 SPUThread::get_events(bool waiting) @@ -1103,7 +1322,7 @@ u32 SPUThread::get_ch_count(u32 ch) case SPU_RdSigNotify2: return ch_snr2.get_count(); case MFC_RdAtomicStat: return ch_atomic_stat.get_count(); case SPU_RdEventStat: return get_events() != 0; - case MFC_Cmd: return std::max(16 - mfc_queue.size(), (u32)0); + case MFC_Cmd: return 16 - mfc_size; } fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???"); @@ -1115,15 +1334,11 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) auto read_channel = [&](spu_channel_t& channel) { - if (channel.try_pop(out)) - return true; - for (int i = 0; i < 10 && channel.get_count() == 0; i++) { busy_wait(); } - u32 ctr = 0; while (!channel.try_pop(out)) { if (test(state, cpu_flag::stop)) @@ -1131,16 +1346,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) return false; } - if (ctr > 10000) - { - ctr = 0; - std::this_thread::yield(); - } - else - { - ctr++; - thread_ctrl::wait(); - } + thread_ctrl::wait(); } return true; @@ -1183,6 +1389,14 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) case MFC_RdTagStat: { + if (ch_tag_stat.get_count()) + { + out = ch_tag_stat.get_value(); + ch_tag_stat.set_value(0, false); + return true; + } + + // Will stall infinitely return read_channel(ch_tag_stat); } @@ -1204,11 +1418,27 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) case MFC_RdAtomicStat: { + if (ch_atomic_stat.get_count()) + { + out = ch_atomic_stat.get_value(); + ch_atomic_stat.set_value(0, false); + return true; + } + + // Will stall infinitely return read_channel(ch_atomic_stat); } case MFC_RdListStallStat: { + if (ch_stall_stat.get_count()) + { + out = ch_stall_stat.get_value(); + ch_stall_stat.set_value(0, false); + return true; + } + + // Will stall infinitely return read_channel(ch_stall_stat); } @@ -1455,6 +1685,23 @@ bool SPUThread::set_ch_value(u32 ch, u32 value) case MFC_WrTagMask: { ch_tag_mask = value; + + if (ch_tag_upd) + { + const u32 completed = get_mfc_completed(); + + if (completed && ch_tag_upd == 1) + { + ch_tag_stat.set_value(completed); + ch_tag_upd = 0; + } + else if (completed == value && ch_tag_upd == 2) + { + ch_tag_stat.set_value(completed); + ch_tag_upd = 0; + } + } + return true; } @@ -1465,42 +1712,24 @@ bool SPUThread::set_ch_value(u32 ch, u32 value) break; } - ch_tag_stat.set_value(0, false); - ch_tag_upd = value; + const u32 completed = get_mfc_completed(); - if (ch_tag_mask == 0) + if (!value) { - // TODO - ch_tag_stat.set_value(0); + ch_tag_stat.set_value(completed); } - else if (mfc_queue.size() == 0 && (!value || ch_tag_upd.exchange(0))) + else if (completed && value == 1) { - ch_tag_stat.set_value(ch_tag_mask); + ch_tag_stat.set_value(completed); } - else if (!value) + else if (completed == ch_tag_mask && value == 2) { - u32 completed = ch_tag_mask; - - for (u32 i = 0; completed && i < 16; i++) - { - const auto& _cmd = mfc_queue.get_push(i); - - if (_cmd.size) - { - completed &= ~(1u << _cmd.tag); - } - } - ch_tag_stat.set_value(completed); } else { - auto mfc = fxm::check_unlocked(); - - //if (test(mfc->state, cpu_flag::is_waiting)) - { - mfc->notify(); - } + ch_tag_upd = value; + ch_tag_stat.set_value(0, false); } return true; @@ -1539,23 +1768,15 @@ bool SPUThread::set_ch_value(u32 ch, u32 value) case MFC_Cmd: { ch_mfc_cmd.cmd = MFC(value & 0xff); - auto cmd = ch_mfc_cmd; // save and restore previous command arguments - process_mfc_cmd(); - ch_mfc_cmd = cmd; - return true; + return process_mfc_cmd(ch_mfc_cmd); } case MFC_WrListStallAck: { // Reset stall status for specified tag - if (atomic_storage::btr(ch_stall_mask.raw(), value)) + if (::test_and_reset(ch_stall_mask, 1u << value)) { - auto mfc = fxm::check_unlocked(); - - //if (test(mfc->state, cpu_flag::is_waiting)) - { - mfc->notify(); - } + do_mfc(); } return true; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 645f7dfea0..6a22d47c20 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -417,11 +417,6 @@ enum FPSCR_EX FPSCR_DDENORM = 1 << 8, //Denormal }; -enum -{ - max_imm_dma_size = 0x4000, // Custom constant, represents the max number of bytes our mfc can transfer immediately, else enqueueing the command -}; - //Is 128 bits, but bits 0-19, 24-28, 32-49, 56-60, 64-81, 88-92, 96-115, 120-124 are unused class SPU_FPSCR { @@ -534,11 +529,10 @@ public: // MFC command data spu_mfc_cmd ch_mfc_cmd; - // MFC command queue (consumer: MFC thread) - lf_spsc mfc_queue; - - // MFC command proxy queue (consumer: MFC thread) - lf_mpsc mfc_proxy; + // MFC command queue + spu_mfc_cmd mfc_queue[16]{}; + u32 mfc_size = 0; + atomic_t mfc_prxy_mask; // Reservation Data u64 rtime = 0; @@ -546,17 +540,15 @@ public: u32 raddr = 0; u32 srr0; - atomic_t ch_tag_upd; - atomic_t ch_tag_mask; + u32 ch_tag_upd; + u32 ch_tag_mask; spu_channel_t ch_tag_stat; - atomic_t ch_stall_mask; + u32 ch_stall_mask; spu_channel_t ch_stall_stat; spu_channel_t ch_atomic_stat; spu_channel_4_t ch_in_mbox; - atomic_t mfc_prxy_mask; - spu_channel_t ch_out_mbox; spu_channel_t ch_out_intr_mbox; @@ -596,9 +588,14 @@ public: u32 recursion_level = 0; void push_snr(u32 number, u32 value); - void do_dma_transfer(const spu_mfc_cmd& args, bool from_mfc = true); + void do_dma_transfer(const spu_mfc_cmd& args); + bool do_dma_check(const spu_mfc_cmd& args); + bool do_list_transfer(spu_mfc_cmd& args); + bool do_putlluc(const spu_mfc_cmd& args); + void do_mfc(); + u32 get_mfc_completed(); - void process_mfc_cmd(); + bool process_mfc_cmd(spu_mfc_cmd args); u32 get_events(bool waiting = false); void set_events(u32 mask); void set_interrupt_status(bool enable); diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp index d277b3f5d2..436d15cd75 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp @@ -235,8 +235,6 @@ error_code sys_spu_thread_initialize(vm::ptr thread, u32 group_id, u32 spu_ auto spu = idm::make_ptr(thread_name, spu_num, group.get()); - fxm::get_always()->add_spu(spu); - *thread = spu->id; group->threads[spu_num] = std::move(spu); @@ -348,8 +346,6 @@ error_code sys_spu_thread_group_destroy(u32 id) } } - fxm::check_unlocked()->add_spu(nullptr); - return CELL_OK; } @@ -1264,11 +1260,7 @@ error_code sys_raw_spu_create(vm::ptr id, vm::ptr attr) thread->cpu_init(); - const u32 _id = thread->index; - - fxm::get_always()->add_spu(std::move(thread)); - - *id = _id; + *id = thread->index; return CELL_OK; } @@ -1330,8 +1322,6 @@ error_code sys_raw_spu_destroy(ppu_thread& ppu, u32 id) idm::remove(thread->id); - fxm::check_unlocked()->add_spu(nullptr); - return CELL_OK; } diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.h b/rpcs3/Emu/Cell/lv2/sys_spu.h index 5309902710..17221bae8e 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.h +++ b/rpcs3/Emu/Cell/lv2/sys_spu.h @@ -223,6 +223,7 @@ struct lv2_spu_group static const u32 id_count = 255; const std::string name; + const u32 id; const u32 num; // SPU Number const s32 type; // SPU Thread Group Type const u32 ct; // Memory Container Id @@ -244,7 +245,8 @@ struct lv2_spu_group std::weak_ptr ep_sysmodule; // TODO: SYS_SPU_THREAD_GROUP_EVENT_SYSTEM_MODULE lv2_spu_group(std::string name, u32 num, s32 prio, s32 type, u32 ct) - : name(name) + : id(idm::last_id()) + , name(name) , num(num) , init(0) , prio(prio) diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index bb96b46cd8..55242214a1 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -948,12 +948,6 @@ bool Emulator::Pause() idm::select(on_select); idm::select(on_select); idm::select(on_select); - - if (auto mfc = fxm::check()) - { - on_select(0, *mfc); - } - return true; } @@ -1019,12 +1013,6 @@ void Emulator::Resume() idm::select(on_select); idm::select(on_select); idm::select(on_select); - - if (auto mfc = fxm::check()) - { - on_select(0, *mfc); - } - GetCallbacks().on_resume(); } @@ -1066,11 +1054,6 @@ void Emulator::Stop(bool restart) idm::select(on_select); idm::select(on_select); - if (auto mfc = fxm::check()) - { - on_select(0, *mfc); - } - LOG_NOTICE(GENERAL, "All threads signaled..."); while (g_thread_count)