diff --git a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp index be89e251d6..b4a1a6cf86 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpursSpu.cpp @@ -133,12 +133,8 @@ bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD) { - u32 rv; - - spu.get_ch_value(MFC_RdAtomicStat, rv); - auto success = rv ? true : false; - success = cmd == MFC_PUTLLC_CMD ? !success : success; - return success; + const u32 rv = spu.get_ch_value(MFC_RdAtomicStat); + return cmd == MFC_PUTLLC_CMD ? !rv : true; } return true; @@ -149,7 +145,7 @@ u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask) { spu.set_ch_value(MFC_WrTagMask, tagMask); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_IMMEDIATE); - u32 rv; spu.get_ch_value(MFC_RdTagStat, rv); return rv; + return spu.get_ch_value(MFC_RdTagStat); } // Wait for DMA operations to complete @@ -157,7 +153,7 @@ u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll) { spu.set_ch_value(MFC_WrTagMask, tagMask); spu.set_ch_value(MFC_WrTagUpdate, waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY); - u32 rv; spu.get_ch_value(MFC_RdTagStat, rv); return rv; + return spu.get_ch_value(MFC_RdTagStat); } // Halt the SPU @@ -168,16 +164,15 @@ void spursHalt(SPUThread& spu) void sys_spu_thread_exit(SPUThread& spu, s32 status) { - u32 _v; // Cancel any pending status update requests spu.set_ch_value(MFC_WrTagUpdate, 0); while (spu.get_ch_count(MFC_RdTagStat) != 1); - spu.get_ch_value(MFC_RdTagStat, _v); + spu.get_ch_value(MFC_RdTagStat); // Wait for all pending DMA operations to complete spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL); - spu.get_ch_value(MFC_RdTagStat, _v); + spu.get_ch_value(MFC_RdTagStat); spu.set_ch_value(SPU_WrOutMbox, status); spu.stop_and_signal(0x102); @@ -185,16 +180,15 @@ void sys_spu_thread_exit(SPUThread& spu, s32 status) void sys_spu_thread_group_exit(SPUThread& spu, s32 status) { - u32 _v; // Cancel any pending status update requests spu.set_ch_value(MFC_WrTagUpdate, 0); while (spu.get_ch_count(MFC_RdTagStat) != 1); - spu.get_ch_value(MFC_RdTagStat, _v); + spu.get_ch_value(MFC_RdTagStat); // Wait for all pending DMA operations to complete spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL); - spu.get_ch_value(MFC_RdTagStat, _v); + spu.get_ch_value(MFC_RdTagStat); spu.set_ch_value(SPU_WrOutMbox, status); spu.stop_and_signal(0x101); @@ -214,9 +208,7 @@ s32 sys_spu_thread_send_event(SPUThread& spu, u8 spup, u32 data0, u32 data1) spu.set_ch_value(SPU_WrOutMbox, data1); spu.set_ch_value(SPU_WrOutIntrMbox, (spup << 24) | (data0 & 0x00FFFFFF)); - - spu.get_ch_value(SPU_RdInMbox, data0); - return data0; + return static_cast(spu.get_ch_value(SPU_RdInMbox)); } s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status) @@ -231,18 +223,18 @@ s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status) // Cancel any pending status update requests spu.set_ch_value(MFC_WrTagUpdate, 0); while (spu.get_ch_count(MFC_RdTagStat) != 1); - spu.get_ch_value(MFC_RdTagStat, result); + spu.get_ch_value(MFC_RdTagStat); // Wait for all pending DMA operations to complete spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL); - spu.get_ch_value(MFC_RdTagStat, result); + spu.get_ch_value(MFC_RdTagStat); do { spu.set_ch_value(SPU_WrOutMbox, status); spu.stop_and_signal(0x120); - spu.get_ch_value(SPU_RdInMbox, result); + result = spu.get_ch_value(SPU_RdInMbox); } while (result == CELL_EBUSY); @@ -1719,9 +1711,8 @@ s32 spursTasketSaveTaskContext(SPUThread& spu) v128 r; spu.fpscr.Read(r); ctxt->savedContextFpscr = r; - u32 r32; - spu.get_ch_value(SPU_RdEventMask, r32); ctxt->savedSpuWriteEventMask = r32; - spu.get_ch_value(MFC_RdTagMask, r32); ctxt->savedWriteTagGroupQueryMask = r32; + ctxt->savedSpuWriteEventMask = spu.get_ch_value(SPU_RdEventMask); + ctxt->savedWriteTagGroupQueryMask = spu.get_ch_value(MFC_RdTagMask); // Store the processor context const u32 contextSaveStorage = vm::cast(taskInfo->context_save_storage_and_alloc_ls_blocks & -0x80, HERE); diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 9cb4d3121a..0be1b2f12b 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -152,7 +152,7 @@ spu_function_t spu_recompiler::compile(const std::vector& func) vec[i] = vec_vars[i]; } - Label label_stop = c->newLabel(); + label_stop = c->newLabel(); Label label_diff = c->newLabel(); Label label_code = c->newLabel(); std::vector words; @@ -163,6 +163,15 @@ spu_function_t spu_recompiler::compile(const std::vector& func) const u32 start = m_pos; const u32 end = m_pos + (func.size() - 1) * 4; + // Create instruction labels (TODO: some of them are unnecessary) + for (u32 i = 1; i < func.size(); i++) + { + if (func[i]) + { + instr_labels[i * 4 - 4 + m_pos] = c->newLabel(); + } + } + // Set PC and check status c->mov(SPU_OFF_32(pc), m_pos); c->cmp(SPU_OFF_32(state), 0); @@ -713,6 +722,14 @@ spu_function_t spu_recompiler::compile(const std::vector& func) // Update position m_pos = pos; + // Bind instruction label if necessary + const auto found = instr_labels.find(pos); + + if (found != instr_labels.end()) + { + c->bind(found->second); + } + // Execute recompiler function (this->*s_spu_decoder.decode(op))({op}); @@ -750,6 +767,27 @@ spu_function_t spu_recompiler::compile(const std::vector& func) work(); } + // Build instruction dispatch table + if (instr_table.isValid()) + { + c->align(kAlignData, 8); + c->bind(instr_table); + + for (u32 addr = start; addr < end; addr += 4) + { + const auto found = instr_labels.find(addr); + + if (found != instr_labels.end()) + { + c->embedLabel(found->second); + } + else + { + c->embedLabel(label_stop); + } + } + } + c->align(kAlignData, words_align); c->bind(label_code); for (u32 d : words) @@ -760,6 +798,9 @@ spu_function_t spu_recompiler::compile(const std::vector& func) work(); } + label_stop.reset(); + instr_table.reset(); + instr_labels.clear(); xmm_consts.clear(); // Compile and get function address @@ -1066,6 +1107,28 @@ void spu_recompiler::branch_fixed(u32 target) { using namespace asmjit; + // Check local branch + const auto local = instr_labels.find(target); + + if (local != instr_labels.end() && local->second.isValid()) + { + c->cmp(SPU_OFF_32(state), 0); + c->jz(local->second); + c->mov(SPU_OFF_32(pc), target); + c->ret(); + return; + } + + if (g_cfg.core.spu_block_size == spu_block_size_type::giga) + { + // Don't generate patch points in this mode + c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2)); + c->mov(SPU_OFF_32(pc), target); + c->xor_(qw0->r32(), qw0->r32()); + c->jmp(x86::rax); + return; + } + // Set patch address as a third argument and fallback to it Label patch_point = c->newLabel(); c->lea(*qw0, x86::qword_ptr(patch_point)); @@ -1109,9 +1172,23 @@ void spu_recompiler::branch_indirect(spu_opcode_t op) { using namespace asmjit; - // Load indirect jump address - c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher))); + if (!instr_table.isValid()) + { + // Request instruction table + instr_table = c->newLabel(); + } + + const u32 start = instr_labels.begin()->first; + const u32 end = instr_labels.rbegin()->first + 4; + + // Load indirect jump address, choose between local and external + c->lea(x86::r10, x86::qword_ptr(instr_table)); + c->lea(*qw1, x86::qword_ptr(*addr, 0 - start)); c->xor_(qw0->r32(), qw0->r32()); + c->cmp(qw1->r32(), end - start); + c->cmovae(qw1->r32(), qw0->r32()); + c->cmovb(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0)); + c->cmovae(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher))); if (op.d) { @@ -1119,22 +1196,34 @@ void spu_recompiler::branch_indirect(spu_opcode_t op) } else if (op.e) { - c->lock().bts(SPU_OFF_8(interrupts_enabled), 0); - c->mov(x86::r9d, SPU_OFF_32(ch_event_stat)); - c->and_(x86::r9d, SPU_OFF_32(ch_event_mask)); - c->and_(x86::r9d, SPU_EVENT_INTR_TEST); - c->cmp(x86::r9d, 0); + Label no_intr = c->newLabel(); + Label intr = c->newLabel(); + Label fail = c->newLabel(); - Label noInterrupt = c->newLabel(); - c->je(noInterrupt); + c->lock().bts(SPU_OFF_8(interrupts_enabled), 0); + c->mov(qw1->r32(), SPU_OFF_32(ch_event_mask)); + c->test(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED); + c->jnz(fail); + c->and_(qw1->r32(), SPU_OFF_32(ch_event_stat)); + c->test(qw1->r32(), SPU_EVENT_INTR_IMPLEMENTED); + c->jnz(intr); + c->jmp(no_intr); + c->bind(fail); + c->mov(SPU_OFF_32(pc), *addr); + c->mov(addr->r64(), reinterpret_cast(vm::base(0xffdead00))); + c->mov(asmjit::x86::dword_ptr(addr->r64()), "INTR"_u32); + c->bind(intr); c->lock().btr(SPU_OFF_8(interrupts_enabled), 0); c->mov(SPU_OFF_32(srr0), *addr); - branch_fixed(0); + c->mov(*addr, qw0->r32()); + c->mov(x86::r10, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher))); c->align(kAlignCode, 16); - c->bind(noInterrupt); + c->bind(no_intr); } c->mov(SPU_OFF_32(pc), *addr); + c->cmp(SPU_OFF_32(state), 0); + c->jnz(label_stop); c->jmp(x86::r10); } @@ -1348,25 +1437,33 @@ void spu_recompiler::MFSPR(spu_opcode_t op) c->movdqa(SPU_OFF_128(gpr, op.rt), vr); } +static void spu_rdch_ret(SPUThread& spu, void*, u32) +{ + // MSVC workaround (TCO) +} + +static void spu_rdch(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32)) +{ + const s64 result = _spu->get_ch_value(ch); + + if (result < 0) + { + _ret = &spu_rdch_ret; + } + + // Return channel value in the third argument + _ret(*_spu, _spu->_ptr(0), static_cast(result)); +} + void spu_recompiler::RDCH(spu_opcode_t op) { using namespace asmjit; - auto gate = [](SPUThread* _spu, u32 ch, v128* out) - { - u32 value; - - if (_spu->get_ch_value(ch, value)) - { - *out = v128::from32r(value); - _spu->pc += 4; - } - }; - auto read_channel = [&](X86Mem channel_ptr, bool sync = true) { Label wait = c->newLabel(); Label again = c->newLabel(); + Label ret = c->newLabel(); c->mov(addr->r64(), channel_ptr); c->xor_(qw0->r32(), qw0->r32()); c->align(kAlignCode, 16); @@ -1376,12 +1473,11 @@ void spu_recompiler::RDCH(spu_opcode_t op) after.emplace_back([=, pos = m_pos] { - // Do not continue after waiting c->bind(wait); c->mov(SPU_OFF_32(pc), pos); - c->mov(*ls, op.ra); - c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); - c->jmp(imm_ptr(gate)); + c->mov(ls->r32(), op.ra); + c->lea(*qw0, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_rdch)); }); if (sync) @@ -1396,10 +1492,11 @@ void spu_recompiler::RDCH(spu_opcode_t op) c->mov(channel_ptr, *qw0); } - const XmmLink& vr = XmmAlloc(); - c->movd(vr, *addr); - c->pslldq(vr, 12); - c->movdqa(SPU_OFF_128(gpr, op.rt), vr); + c->mov(qw0->r32(), *addr); + c->bind(ret); + c->movd(x86::xmm0, qw0->r32()); + c->pslldq(x86::xmm0, 12); + c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0); }; switch (op.ra) @@ -1415,42 +1512,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) case SPU_RdInMbox: { // TODO - Label wait = c->newLabel(); - Label next = c->newLabel(); - c->mov(SPU_OFF_32(pc), m_pos); - c->cmp(x86::byte_ptr(*cpu, offset32(&SPUThread::ch_in_mbox) + 1), 0); - c->jz(wait); - - after.emplace_back([=] - { - // Do not continue after waiting - c->bind(wait); - c->mov(*ls, op.ra); - c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); - c->jmp(imm_ptr(gate)); - }); - - auto sub = [](SPUThread* _spu, v128* out, spu_function_t _ret) - { - // Workaround for gcc (TCO) - static thread_local u32 value; - - if (!_spu->get_ch_value(SPU_RdInMbox, value)) - { - // Workaround for MSVC (TCO) - fmt::raw_error("spu_recompiler::RDCH(): unexpected SPUThread::get_ch_value(SPU_RdInMbox) call"); - } - - *out = v128::from32r(value); - _ret(*_spu, _spu->_ptr(0), nullptr); - }; - - c->lea(*ls, SPU_OFF_128(gpr, op.rt)); - c->lea(*qw0, x86::qword_ptr(next)); - c->jmp(imm_ptr(sub)); - c->align(kAlignCode, 16); - c->bind(next); - return; + break; } case MFC_RdTagStat: { @@ -1489,7 +1551,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) { LOG_WARNING(SPU, "[0x%x] RDCH: RdDec", m_pos); - auto gate1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret) + auto sub1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret) { const u32 out = _spu->ch_dec_value - static_cast(get_timebased_time() - _spu->ch_dec_start_timestamp); @@ -1500,7 +1562,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) _ret(*_spu, _spu->_ptr(0), nullptr); }; - auto gate2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret) + auto sub2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret) { const u32 out = _spu->ch_dec_value - static_cast(get_timebased_time() - _spu->ch_dec_start_timestamp); @@ -1514,7 +1576,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) c->mov(SPU_OFF_32(pc), m_pos); c->lea(*ls, SPU_OFF_128(gpr, op.rt)); c->lea(*qw0, asmjit::x86::qword_ptr(next)); - c->jmp(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr(gate1) : asmjit::imm_ptr(gate2)); + c->jmp(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr(sub1) : asmjit::imm_ptr(sub2)); c->align(asmjit::kAlignCode, 16); c->bind(next); return; @@ -1532,22 +1594,23 @@ void spu_recompiler::RDCH(spu_opcode_t op) LOG_WARNING(SPU, "[0x%x] RDCH: RdEventStat", m_pos); get_events(); Label wait = c->newLabel(); + Label ret = c->newLabel(); c->jz(wait); after.emplace_back([=, pos = m_pos] { - // Do not continue after waiting c->bind(wait); c->mov(SPU_OFF_32(pc), pos); - c->mov(*ls, op.ra); - c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); - c->jmp(imm_ptr(gate)); + c->mov(ls->r32(), op.ra); + c->lea(*qw0, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_rdch)); }); - const XmmLink& vr = XmmAlloc(); - c->movd(vr, *addr); - c->pslldq(vr, 12); - c->movdqa(SPU_OFF_128(gpr, op.rt), vr); + c->mov(qw0->r32(), *addr); + c->bind(ret); + c->movd(x86::xmm0, qw0->r32()); + c->pslldq(x86::xmm0, 12); + c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0); return; } case SPU_RdMachStat: @@ -1561,23 +1624,28 @@ void spu_recompiler::RDCH(spu_opcode_t op) } } + Label ret = c->newLabel(); c->mov(SPU_OFF_32(pc), m_pos); - c->mov(*ls, op.ra); - c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); - c->jmp(imm_ptr(gate)); - m_pos = -1; + c->mov(ls->r32(), op.ra); + c->lea(*qw0, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_rdch)); + c->bind(ret); + c->movd(x86::xmm0, qw0->r32()); + c->pslldq(x86::xmm0, 12); + c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0); +} + +static void spu_rchcnt(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32 res)) +{ + // Put result into the third argument + const u32 res = _spu->get_ch_count(ch); + _ret(*_spu, _spu->_ptr(0), res); } void spu_recompiler::RCHCNT(spu_opcode_t op) { using namespace asmjit; - auto gate = [](SPUThread* _spu, u32 ch, v128* out) - { - *out = v128::from32r(_spu->get_ch_count(ch)); - _spu->pc += 4; - }; - auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false) { // Load channel count @@ -1638,21 +1706,26 @@ void spu_recompiler::RCHCNT(spu_opcode_t op) { LOG_WARNING(SPU, "[0x%x] RCHCNT: RdEventStat", m_pos); get_events(); - c->setnz(addr->r8()); - c->movzx(*addr, addr->r8()); - c->movd(x86::xmm0, *addr); - c->pslldq(x86::xmm0, 12); - c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0); - return; + c->setnz(qw0->r8()); + c->movzx(qw0->r32(), qw0->r8()); + break; + } + default: + { + Label ret = c->newLabel(); + c->mov(SPU_OFF_32(pc), m_pos); + c->mov(*ls, op.ra); + c->lea(*qw0, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_rchcnt)); + c->bind(ret); + break; } } - // Non-returnable fallback for unsupported events - c->mov(SPU_OFF_32(pc), m_pos); - c->mov(*ls, op.ra); - c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); - c->jmp(imm_ptr(gate)); - m_pos = -1; + // Use result from the third argument + c->movd(x86::xmm0, qw0->r32()); + c->pslldq(x86::xmm0, 12); + c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0); } void spu_recompiler::SF(spu_opcode_t op) @@ -2310,18 +2383,35 @@ void spu_recompiler::MTSPR(spu_opcode_t op) // Check SPUInterpreter for notes. } +static void spu_wrch_ret(SPUThread& _spu, void*, u8*) +{ + // MSVC workaround (TCO) +} + +static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret) +{ + if (!_spu->set_ch_value(ch, value)) + { + _ret = &spu_wrch_ret; + } + + _ret(*_spu, _spu->_ptr(0), nullptr); +} + +static void spu_wrch_mfc(SPUThread* _spu, spu_function_t _ret) +{ + if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd)) + { + _ret = &spu_wrch_ret; + } + + _ret(*_spu, _spu->_ptr(0), nullptr); +} + void spu_recompiler::WRCH(spu_opcode_t op) { using namespace asmjit; - auto gate = [](SPUThread* _spu, u32 ch, u32 value) - { - if (_spu->set_ch_value(ch, value)) - { - _spu->pc += 4; - } - }; - switch (op.ra) { case SPU_WrSRR0: @@ -2332,42 +2422,14 @@ void spu_recompiler::WRCH(spu_opcode_t op) } case SPU_WrOutIntrMbox: { - auto sub = [](SPUThread* _spu, spu_function_t _ret, u32 value) - { - if (!_spu->set_ch_value(SPU_WrOutIntrMbox, value)) - { - fmt::raw_error("spu_recompiler::WRCH(): unexpected SPUThread::set_ch_value(SPU_WrOutIntrMbox) call"); - } - - // Continue - _ret(*_spu, _spu->_ptr(0), nullptr); - }; - - Label ret = c->newLabel(); - Label wait = c->newLabel(); - c->mov(SPU_OFF_32(pc), m_pos); - c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); - c->bt(SPU_OFF_64(ch_out_intr_mbox), spu_channel::off_count); - c->jc(wait); - - after.emplace_back([=] - { - // Do not continue after waiting - c->bind(wait); - c->mov(*ls, op.ra); - c->jmp(imm_ptr(gate)); - }); - - c->lea(*ls, x86::qword_ptr(ret)); - c->jmp(imm_ptr(sub)); - c->align(kAlignCode, 16); - c->bind(ret); - return; + // Can't seemingly be optimized + break; } case SPU_WrOutMbox: { Label wait = c->newLabel(); Label again = c->newLabel(); + Label ret = c->newLabel(); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox)); c->align(kAlignCode, 16); @@ -2378,16 +2440,17 @@ void spu_recompiler::WRCH(spu_opcode_t op) after.emplace_back([=, pos = m_pos] { - // Do not continue after waiting c->bind(wait); c->mov(SPU_OFF_32(pc), pos); - c->mov(*ls, op.ra); - c->jmp(imm_ptr(gate)); + c->mov(ls->r32(), op.ra); + c->lea(*qw1, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_wrch)); }); c->bts(*qw0, spu_channel::off_count); c->lock().cmpxchg(SPU_OFF_64(ch_out_mbox), *qw0); c->jnz(again); + c->bind(ret); return; } case MFC_WrTagMask: @@ -2401,21 +2464,11 @@ void spu_recompiler::WRCH(spu_opcode_t op) after.emplace_back([=, pos = m_pos] { - auto sub = [](SPUThread* _spu, spu_function_t _ret, u32 value) - { - if (!_spu->set_ch_value(MFC_WrTagMask, value)) - { - fmt::raw_error("spu_recompiler::WRCH(): unexpected SPUThread::set_ch_value(MFC_WrTagMask) call"); - } - - // Continue - _ret(*_spu, _spu->_ptr(0), nullptr); - }; - c->bind(upd); c->mov(SPU_OFF_32(pc), pos); - c->lea(*ls, x86::qword_ptr(ret)); - c->jmp(imm_ptr(sub)); + c->lea(ls->r32(), MFC_WrTagMask); + c->lea(*qw1, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_wrch)); }); c->bind(ret); @@ -2434,8 +2487,9 @@ void spu_recompiler::WRCH(spu_opcode_t op) { c->bind(fail); c->mov(SPU_OFF_32(pc), pos); - c->mov(*ls, op.ra); - c->jmp(imm_ptr(gate)); + c->mov(ls->r32(), op.ra); + c->lea(*qw1, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_wrch)); c->bind(zero); c->mov(SPU_OFF_32(ch_tag_upd), qw0->r32()); @@ -2496,22 +2550,12 @@ void spu_recompiler::WRCH(spu_opcode_t op) case MFC_Cmd: { // TODO - auto sub = [](SPUThread* _spu, spu_function_t _ret) - { - if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd)) - { - throw cpu_flag::ret; - } - - _ret(*_spu, _spu->_ptr(0), nullptr); - }; - Label ret = c->newLabel(); c->mov(*addr, SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(SPU_OFF_8(ch_mfc_cmd, &spu_mfc_cmd::cmd), addr->r8()); c->mov(SPU_OFF_32(pc), m_pos); c->lea(*ls, x86::qword_ptr(ret)); - c->jmp(imm_ptr(sub)); + c->jmp(imm_ptr(spu_wrch_mfc)); c->align(kAlignCode, 16); c->bind(ret); return; @@ -2555,6 +2599,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) case SPU_WrEventMask: { Label fail = c->newLabel(); + Label ret = c->newLabel(); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(*addr, ~SPU_EVENT_IMPLEMENTED); c->mov(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED); @@ -2567,16 +2612,19 @@ void spu_recompiler::WRCH(spu_opcode_t op) { c->bind(fail); c->mov(SPU_OFF_32(pc), pos); - c->mov(*ls, op.ra); - c->jmp(imm_ptr(gate)); + c->mov(ls->r32(), op.ra); + c->lea(*qw1, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_wrch)); }); c->mov(SPU_OFF_32(ch_event_mask), qw0->r32()); + c->bind(ret); return; } case SPU_WrEventAck: { Label fail = c->newLabel(); + Label ret = c->newLabel(); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->test(qw0->r32(), ~SPU_EVENT_IMPLEMENTED); c->jnz(fail); @@ -2585,8 +2633,9 @@ void spu_recompiler::WRCH(spu_opcode_t op) { c->bind(fail); c->mov(SPU_OFF_32(pc), pos); - c->mov(*ls, op.ra); - c->jmp(imm_ptr(gate)); + c->mov(ls->r32(), op.ra); + c->lea(*qw1, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_wrch)); }); c->not_(qw0->r32()); @@ -2599,11 +2648,13 @@ void spu_recompiler::WRCH(spu_opcode_t op) } } + Label ret = c->newLabel(); c->mov(SPU_OFF_32(pc), m_pos); - c->mov(*ls, op.ra); + c->mov(ls->r32(), op.ra); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); - c->jmp(imm_ptr(gate)); - m_pos = -1; + c->lea(*qw1, x86::qword_ptr(ret)); + c->jmp(imm_ptr(spu_wrch)); + c->bind(ret); } void spu_recompiler::BIZ(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index eb3de416fa..84fecb4a8a 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -59,6 +59,15 @@ private: std::vector> after; std::vector> consts; + // Function return label + asmjit::Label label_stop; + + // Indirect branch dispatch table + asmjit::Label instr_table; + + // All valid instruction labels + std::map instr_labels; + // All emitted 128-bit consts std::map, asmjit::Label> xmm_consts; diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index ebe73b837c..ac56da16cc 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -94,14 +94,14 @@ bool spu_interpreter::MFSPR(SPUThread& spu, spu_opcode_t op) bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op) { - u32 result; + const s64 result = spu.get_ch_value(op.ra); - if (!spu.get_ch_value(op.ra, result)) + if (result < 0) { return false; } - spu.gpr[op.rt] = v128::from32r(result); + spu.gpr[op.rt] = v128::from32r(static_cast(result)); return true; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 2755f864fd..8909c41409 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -119,6 +119,22 @@ void fmt_class_string::format(std::string& out, u64 arg) }); } +template <> +void fmt_class_string::format(std::string& out, u64 arg) +{ + format_enum(out, arg, [](spu_block_size_type type) + { + switch (type) + { + case spu_block_size_type::safe: return "Safe"; + case spu_block_size_type::mega: return "Mega"; + case spu_block_size_type::giga: return "Giga"; + } + + return unknown; + }); +} + namespace spu { namespace scheduler @@ -1497,11 +1513,11 @@ u32 SPUThread::get_ch_count(u32 ch) fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???"); } -bool SPUThread::get_ch_value(u32 ch, u32& out) +s64 SPUThread::get_ch_value(u32 ch) { LOG_TRACE(SPU, "get_ch_value(ch=%d [%s])", ch, ch < 128 ? spu_ch_name[ch] : "???"); - auto read_channel = [&](spu_channel& channel) + auto read_channel = [&](spu_channel& channel) -> s64 { for (int i = 0; i < 10 && channel.get_count() == 0; i++) { @@ -1515,25 +1531,26 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) } } + u32 out; + while (!channel.try_pop(out)) { if (test(state, cpu_flag::stop)) { - return false; + return -1; } thread_ctrl::wait(); } - return true; + return out; }; switch (ch) { case SPU_RdSRR0: { - out = srr0; - return true; + return srr0; } case SPU_RdInMbox: { @@ -1551,6 +1568,8 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) } } + u32 out; + if (const uint old_count = ch_in_mbox.try_pop(out)) { if (old_count == 4 /* SPU_IN_MBOX_THRESHOLD */) // TODO: check this @@ -1558,12 +1577,12 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) int_ctrl[2].set(SPU_INT2_STAT_SPU_MAILBOX_THRESHOLD_INT); } - return true; + return out; } if (test(state & cpu_flag::stop)) { - return false; + return -1; } thread_ctrl::wait(); @@ -1579,9 +1598,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) if (ch_tag_stat.get_count()) { - out = ch_tag_stat.get_value(); + u32 out = ch_tag_stat.get_value(); ch_tag_stat.set_value(0, false); - return true; + return out; } // Will stall infinitely @@ -1590,8 +1609,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) case MFC_RdTagMask: { - out = ch_tag_mask; - return true; + return ch_tag_mask; } case SPU_RdSigNotify1: @@ -1608,9 +1626,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) { if (ch_atomic_stat.get_count()) { - out = ch_atomic_stat.get_value(); + u32 out = ch_atomic_stat.get_value(); ch_atomic_stat.set_value(0, false); - return true; + return out; } // Will stall infinitely @@ -1621,9 +1639,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) { if (ch_stall_stat.get_count()) { - out = ch_stall_stat.get_value(); + u32 out = ch_stall_stat.get_value(); ch_stall_stat.set_value(0, false); - return true; + return out; } // Will stall infinitely @@ -1632,19 +1650,18 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) case SPU_RdDec: { - out = ch_dec_value - (u32)(get_timebased_time() - ch_dec_start_timestamp); + u32 out = ch_dec_value - (u32)(get_timebased_time() - ch_dec_start_timestamp); //Polling: We might as well hint to the scheduler to slot in another thread since this one is counting down if (g_cfg.core.spu_loop_detection && out > spu::scheduler::native_jiffy_duration_us) std::this_thread::yield(); - return true; + return out; } case SPU_RdEventMask: { - out = ch_event_mask; - return true; + return ch_event_mask; } case SPU_RdEventStat: @@ -1658,8 +1675,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) if (res) { - out = res; - return true; + return res; } vm::waiter waiter; @@ -1678,22 +1694,20 @@ bool SPUThread::get_ch_value(u32 ch, u32& out) { if (test(state & cpu_flag::stop)) { - return false; + return -1; } thread_ctrl::wait_for(100); } - out = res; - return true; + return res; } case SPU_RdMachStat: { // HACK: "Not isolated" status // Return SPU Interrupt status in LSB - out = interrupts_enabled == true; - return true; + return interrupts_enabled == true; } } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index b4ba66269b..40d4568f9c 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -609,7 +609,7 @@ public: void set_events(u32 mask); void set_interrupt_status(bool enable); u32 get_ch_count(u32 ch); - bool get_ch_value(u32 ch, u32& out); + s64 get_ch_value(u32 ch); bool set_ch_value(u32 ch, u32 value); bool stop_and_signal(u32 code); void halt(); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 9c046470f7..96f6002e21 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -30,6 +30,13 @@ enum class spu_decoder_type llvm, }; +enum class spu_block_size_type +{ + safe, + mega, + giga, +}; + enum class lib_loading_type { automatic, @@ -303,6 +310,7 @@ struct cfg_root : cfg::node cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield cfg::_bool spu_shared_runtime{this, "SPU Shared Runtime", true}; // Share compiled SPU functions between all threads + cfg::_enum spu_block_size{this, "SPU Block Size"}; cfg::_enum lib_loading{this, "Lib Loader", lib_loading_type::liblv2only}; cfg::_bool hook_functions{this, "Hook static functions"};