diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 03e49b8aa0..6cb0686122 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -187,7 +187,7 @@ u64 PPUThread::FastCall2(u32 addr, u32 rtoc) LR = Emu.m_ppu_thr_stop; SetCurrentNamedThread(this); - Task(); + CPUThread::Task(); m_status = old_status; PC = old_PC; @@ -202,4 +202,16 @@ u64 PPUThread::FastCall2(u32 addr, u32 rtoc) void PPUThread::FastStop() { m_status = Stopped; -} \ No newline at end of file +} + +void PPUThread::Task() +{ + if (m_custom_task) + { + m_custom_task(*this); + } + else + { + CPUThread::Task(); + } +} diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 0fb4e2867b..34021a2d6a 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -470,9 +470,6 @@ struct FPRdouble class PPUThread : public PPCThread { -public: - u32 owned_mutexes; - public: PPCdouble FPR[32]; //Floating Point Register FPSCRhdr FPSCR; //Floating Point Status and Control Register @@ -556,6 +553,9 @@ public: u64 R_ADDR; // reservation address u64 R_VALUE; // reservation value (BE) + u32 owned_mutexes; + std::function m_custom_task; + public: PPUThread(); virtual ~PPUThread(); @@ -785,17 +785,18 @@ public: public: virtual void InitRegs(); + virtual void Task(); u64 GetStackArg(s32 i); u64 FastCall2(u32 addr, u32 rtoc); void FastStop(); - - virtual void DoReset() override; virtual void DoRun() override; + +protected: + virtual void DoReset() override; virtual void DoPause() override; virtual void DoResume() override; virtual void DoStop() override; -protected: virtual void Step() override { //if(++cycle > 20) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index f7dc30ca73..0ff0af3c6a 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -50,7 +50,15 @@ void SPUThread::Task() const int round = std::fegetround(); std::fesetround(FE_TOWARDZERO); - CPUThread::Task(); + if (m_custom_task) + { + m_custom_task(*this); + } + else + { + CPUThread::Task(); + } + if (std::fegetround() != FE_TOWARDZERO) { LOG_ERROR(Log::SPU, "Rounding mode has changed(%d)", std::fegetround()); @@ -68,7 +76,7 @@ void SPUThread::DoReset() void SPUThread::InitRegs() { - GPR[1]._u32[3] = 0x40000 - 120; + GPR[1]._u32[3] = 0x3FFF0; // initial stack frame pointer cfg.Reset(); @@ -138,6 +146,23 @@ void SPUThread::DoClose() } } +void SPUThread::FastCall(u32 ls_addr) +{ + // doesn't touch thread status (instead of PPUThread::FastCall2); + // can't be called from another thread (because it doesn't make sense); + // FastStop-like routine is not defined (TODO); + + auto old_PC = PC; + auto old_stack = GPR[1]; // only saved and restored (may be wrong) + + PC = ls_addr; + + CPUThread::Task(); + + PC = old_PC; + GPR[1] = old_stack; +} + void SPUThread::WriteSNR(bool number, u32 value) { if (cfg.value & ((u64)1 << (u64)number)) @@ -272,7 +297,7 @@ void SPUThread::ListCmd(u32 lsa, u64 ea, u16 tag, u16 size, u32 cmd, MFCReg& MFC auto rec = vm::ptr::make(dmac.ls_offset + list_addr + i * 8); u32 size = rec->ts; - if (size < 16 && size != 1 && size != 2 && size != 4 && size != 8) + if (!(rec->s.ToBE() & se16(0x8000)) && size < 16 && size != 1 && size != 2 && size != 4 && size != 8) { LOG_ERROR(Log::SPU, "DMA List: invalid transfer size(%d)", size); result = MFC_PPU_DMA_CMD_SEQUENCE_ERROR; @@ -280,13 +305,16 @@ void SPUThread::ListCmd(u32 lsa, u64 ea, u16 tag, u16 size, u32 cmd, MFCReg& MFC } u32 addr = rec->ea; - ProcessCmd(cmd, tag, lsa | (addr & 0xf), addr, size); - if (Ini.HLELogging.GetValue() || rec->s) + if (size) + ProcessCmd(cmd, tag, lsa | (addr & 0xf), addr, size); + + if (Ini.HLELogging.GetValue() || rec->s.ToBE()) LOG_NOTICE(Log::SPU, "*** list element(%d/%d): s = 0x%x, ts = 0x%x, low ea = 0x%x (lsa = 0x%x)", i, list_size, (u16)rec->s, (u16)rec->ts, (u32)rec->ea, lsa | (addr & 0xf)); - lsa += std::max(size, (u32)16); + if (size) + lsa += std::max(size, (u32)16); if (rec->s.ToBE() & se16(0x8000)) { @@ -454,7 +482,7 @@ void SPUThread::EnqMfcCmd(MFCReg& MFCArgs) } else // store unconditional { - if (R_ADDR) + if (R_ADDR) // may be wrong { m_events |= SPU_EVENT_LR; } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index ca17de309d..eba9dd6a0c 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -503,6 +503,8 @@ public: void WriteLS64 (const u32 lsa, const u64& data) const { vm::write64 (lsa + m_offset, data); } void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); } + std::function m_custom_task; + public: SPUThread(CPUThreadType type = CPU_THREAD_SPU); virtual ~SPUThread(); @@ -560,6 +562,7 @@ public: public: virtual void InitRegs(); virtual void Task(); + void FastCall(u32 ls_addr); protected: virtual void DoReset(); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 0039cfd3b1..6c4ef20f46 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -44,8 +44,8 @@ s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr po if (sdk_ver == -1) sdk_ver = 0x460000; u8 _port = 0x3f; - u8 port_start = 0x10; u64 port_mask = 0; + if (isDynamic == 0) { _port = *port; @@ -53,18 +53,18 @@ s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr po { return CELL_SPURS_CORE_ERROR_INVAL; } - if (sdk_ver <= 0x17ffff && _port > 0xf) + if (sdk_ver > 0x17ffff && _port > 0xf) { return CELL_SPURS_CORE_ERROR_PERM; } - port_start = _port; } - for (u32 i = port_start + 1; i < _port; i++) + for (u32 i = isDynamic ? 0x10 : _port; i <= _port; i++) { - port_mask |= 1ull << (i - 1); + port_mask |= 1ull << (i); } + assert(port_mask); // zero mask will return CELL_EINVAL if (s32 res = sys_spu_thread_group_connect_event_all_threads(spurs->m.spuTG, queue, port_mask, port)) { if (res == CELL_EISCONN) @@ -78,7 +78,6 @@ s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr po { spurs->m.spups |= be_t::make(1ull << *port); // atomic bitwise or } - return CELL_OK; #endif } @@ -141,7 +140,7 @@ s64 spursInit( return CELL_SPURS_CORE_ERROR_PERM; } - const bool isSecond = flags & SAF_SECOND_VERSION; + const bool isSecond = (flags & SAF_SECOND_VERSION) != 0; memset(spurs.get_ptr(), 0, CellSpurs::size1 + isSecond * CellSpurs::size2); spurs->m.revision = revision; spurs->m.sdkVersion = sdkVersion; @@ -198,10 +197,9 @@ s64 spursInit( spurs->m.spuPriority = spuPriority; #ifdef PRX_DEBUG assert(spu_image_import(spurs->m.spuImg, vm::read32(libsre_rtoc - (isSecond ? 0x7E94 : 0x7E98)), 1) == CELL_OK); +#else + spurs->m.spuImg.addr = Memory.Alloc(0x40000, 4096); #endif - //char str1[0x80]; - //memcpy(str1, prefix, prefixSize); // strcpy - //memcpy(str1 + prefixSize, "CellSpursKernelGroup", 21); // strcat s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL; if (flags & SAF_SPU_TGT_EXCLUSIVE_NON_CONTEXT) @@ -222,10 +220,17 @@ s64 spursInit( spurs->m.spuTG = tg->m_id; name += "CellSpursKernel0"; - for (s32 i = 0; i < nSpus; i++, name[name.size() - 1]++) + for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++) { - auto spu = spu_thread_initialize(tg, i, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, u64(i) << 32, spurs.addr(), 0, 0); - spurs->m.spus[i] = spu->GetId(); + spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num, isSecond](SPUThread& CPU) + { +#ifdef PRX_DEBUG + CPU.GPR[3]._u32[3] = num; + CPU.GPR[4]._u64[1] = spurs.addr(); + return CPU.FastCall(CPU.PC); +#endif + + })->GetId(); } if (flags & SAF_SPU_PRINTF_ENABLED) @@ -261,22 +266,24 @@ s64 spursInit( name = std::string(prefix, prefixSize); - PPUThread* ppu0 = nullptr; + spurs->m.ppu0 = ppu_thread_create(0, 0, ppuPriority, 0x4000, true, false, name + "SpursHdlr0", [spurs](PPUThread& CPU) + { #ifdef PRX_DEBUG - ppu0 = ppu_thread_create(vm::read32(libsre_rtoc - 0x7E60), spurs.addr(), ppuPriority, 0x4000, true, false, name + "SpursHdlr0"); + return cb_call>(CPU, libsre + 0x9214, libsre_rtoc, spurs); #endif - assert(ppu0); - spurs->m.ppu0 = ppu0->GetId(); - PPUThread* ppu1 = nullptr; + })->GetId(); + + spurs->m.ppu1 = ppu_thread_create(0, 0, ppuPriority, 0x8000, true, false, name + "SpursHdlr1", [spurs](PPUThread& CPU) + { #ifdef PRX_DEBUG - ppu1 = ppu_thread_create(vm::read32(libsre_rtoc - 0x7E24), spurs.addr(), ppuPriority, 0x8000, true, false, name + "SpursHdlr1"); + return cb_call>(CPU, libsre + 0xB40C, libsre_rtoc, spurs); #endif - assert(ppu1); - spurs->m.ppu1 = ppu1->GetId(); + + })->GetId(); // enable exception event handler - if (spurs->m.enableEH.compare_and_swap(be_t::make(0), be_t::make(1)).ToBE() == 0) + if (spurs->m.enableEH.compare_and_swap_test(be_t::make(0), be_t::make(1))) { assert(sys_spu_thread_group_connect_event(spurs->m.spuTG, spurs->m.queue, SYS_SPU_THREAD_GROUP_EVENT_EXCEPTION) == CELL_OK); } @@ -291,12 +298,10 @@ s64 spursInit( if (flags & SAF_SYSTEM_WORKLOAD_ENABLED) // initialize system workload { - s32 res; + s32 res = CELL_OK; #ifdef PRX_DEBUG res = cb_call, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0x10428, libsre_rtoc, spurs, Memory.RealToVirtualAddr(swlPriority), swlMaxSpu, swlIsPreem); -#else - res = -1; #endif assert(res == CELL_OK); } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp index 84e1bf877f..8f06a48660 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp @@ -367,6 +367,7 @@ s32 cellSyncRwmTryRead(vm::ptr rwm, vm::ptr buffer) { return res; } + memcpy(buffer.get_ptr(), rwm->m_buffer.get_ptr(), (u32)rwm->m_size); return rwm->data.atomic_op(CELL_OK, syncRwmReadEndOp); @@ -520,7 +521,8 @@ s32 cellSyncQueuePush(vm::ptr queue, vm::ptr buffer) const u32 size = (u32)queue->m_size; const u32 depth = (u32)queue->m_depth; - assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth); + const auto data = queue->data.read_relaxed(); + assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 @@ -559,7 +561,8 @@ s32 cellSyncQueueTryPush(vm::ptr queue, vm::ptr buffe const u32 size = (u32)queue->m_size; const u32 depth = (u32)queue->m_depth; - assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth); + const auto data = queue->data.read_relaxed(); + assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; if (s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 @@ -610,7 +613,8 @@ s32 cellSyncQueuePop(vm::ptr queue, vm::ptr buffer) const u32 size = (u32)queue->m_size; const u32 depth = (u32)queue->m_depth; - assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth); + const auto data = queue->data.read_relaxed(); + assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 @@ -649,7 +653,8 @@ s32 cellSyncQueueTryPop(vm::ptr queue, vm::ptr buffer) const u32 size = (u32)queue->m_size; const u32 depth = (u32)queue->m_depth; - assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth); + const auto data = queue->data.read_relaxed(); + assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; if (s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 @@ -694,7 +699,8 @@ s32 cellSyncQueuePeek(vm::ptr queue, vm::ptr buffer) const u32 size = (u32)queue->m_size; const u32 depth = (u32)queue->m_depth; - assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth); + const auto data = queue->data.read_relaxed(); + assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; while (queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 @@ -730,7 +736,8 @@ s32 cellSyncQueueTryPeek(vm::ptr queue, vm::ptr buffer) const u32 size = (u32)queue->m_size; const u32 depth = (u32)queue->m_depth; - assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth); + const auto data = queue->data.read_relaxed(); + assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); u32 position; if (s32 res = queue->data.atomic_op(CELL_OK, [depth, &position](CellSyncQueue::data_t& queue) -> s32 @@ -759,9 +766,10 @@ s32 cellSyncQueueSize(vm::ptr queue) return CELL_SYNC_ERROR_ALIGN; } - const u32 count = (u32)queue->data.read_relaxed().m_v2 & 0xffffff; + const auto data = queue->data.read_relaxed(); + const u32 count = (u32)data.m_v2 & 0xffffff; const u32 depth = (u32)queue->m_depth; - assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && count <= depth); + assert(((u32)data.m_v1 & 0xffffff) <= depth && count <= depth); return count; } @@ -780,7 +788,8 @@ s32 cellSyncQueueClear(vm::ptr queue) } const u32 depth = (u32)queue->m_depth; - assert(((u32)queue->data.read_relaxed().m_v1 & 0xffffff) <= depth && ((u32)queue->data.read_relaxed().m_v2 & 0xffffff) <= depth); + const auto data = queue->data.read_relaxed(); + assert(((u32)data.m_v1 & 0xffffff) <= depth && ((u32)data.m_v2 & 0xffffff) <= depth); // TODO: optimize if possible while (queue->data.atomic_op(CELL_OK, [depth](CellSyncQueue::data_t& queue) -> s32 diff --git a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp index 3e504cffc9..0c33f0e25d 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp @@ -147,7 +147,7 @@ s32 sys_ppu_thread_restart(u64 thread_id) return CELL_OK; } -PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool is_joinable, bool is_interrupt, const std::string& name) +PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool is_joinable, bool is_interrupt, const std::string& name, std::function task) { PPUThread& new_thread = *(PPUThread*)&Emu.GetCPU().AddThread(CPU_THREAD_PPU); @@ -159,6 +159,7 @@ PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool i new_thread.m_has_interrupt = false; new_thread.m_is_interrupt = is_interrupt; new_thread.SetName(name); + new_thread.m_custom_task = task; sys_ppu_thread.Notice("*** New PPU Thread [%s] (%s, entry=0x%x): id = %d", name.c_str(), is_interrupt ? "interrupt" : diff --git a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.h b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.h index b5d8540d32..c76c49b461 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.h +++ b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.h @@ -15,7 +15,7 @@ enum ppu_thread_flags : u64 }; // Aux -PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool is_joinable, bool is_interrupt, const std::string& name); +PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool is_joinable, bool is_interrupt, const std::string& name, std::function task = nullptr); // SysCalls void sys_ppu_thread_exit(PPUThread& CPU, u64 errorcode); diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp index 26909c9408..2f790c5ecf 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp @@ -59,7 +59,7 @@ s32 sys_spu_image_open(vm::ptr img, vm::ptr path) return CELL_OK; } -SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image& img, const std::string& name, u32 option, u64 a1, u64 a2, u64 a3, u64 a4) +SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image& img, const std::string& name, u32 option, u64 a1, u64 a2, u64 a3, u64 a4, std::function task) { if (option) { @@ -77,6 +77,7 @@ SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image new_thread.SetOffset(spu_offset); new_thread.SetEntry(spu_ep); new_thread.SetName(name); + new_thread.m_custom_task = task; new_thread.Run(); new_thread.GPR[3] = u128::from64(0, a1); new_thread.GPR[4] = u128::from64(0, a2); diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.h b/rpcs3/Emu/SysCalls/lv2/sys_spu.h index 526334fe36..203138e8c8 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.h +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.h @@ -158,7 +158,7 @@ class SPUThread; // Aux s32 spu_image_import(sys_spu_image& img, u32 src, u32 type); SpuGroupInfo* spu_thread_group_create(const std::string& name, u32 num, s32 prio, s32 type, u32 container); -SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image& img, const std::string& name, u32 option, u64 a1, u64 a2, u64 a3, u64 a4); +SPUThread* spu_thread_initialize(SpuGroupInfo* group, u32 spu_num, sys_spu_image& img, const std::string& name, u32 option, u64 a1, u64 a2, u64 a3, u64 a4, std::function task = nullptr); // SysCalls s32 sys_spu_initialize(u32 max_usable_spu, u32 max_raw_spu);