SPU ASMJIT: internal jumptable

Allow indirect calls within current function using a jumptable
This restores some functionality removed in SPU ASMJIT 2.0
Change SPUThread::get_ch_value prototype
This commit is contained in:
Nekotekina 2018-04-30 19:39:06 +03:00
parent df453d6d4f
commit 767dfa271e
7 changed files with 302 additions and 229 deletions

View File

@ -133,12 +133,8 @@ bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag)
if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD)
{
u32 rv;
spu.get_ch_value(MFC_RdAtomicStat, rv);
auto success = rv ? true : false;
success = cmd == MFC_PUTLLC_CMD ? !success : success;
return success;
const u32 rv = spu.get_ch_value(MFC_RdAtomicStat);
return cmd == MFC_PUTLLC_CMD ? !rv : true;
}
return true;
@ -149,7 +145,7 @@ u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask)
{
spu.set_ch_value(MFC_WrTagMask, tagMask);
spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_IMMEDIATE);
u32 rv; spu.get_ch_value(MFC_RdTagStat, rv); return rv;
return spu.get_ch_value(MFC_RdTagStat);
}
// Wait for DMA operations to complete
@ -157,7 +153,7 @@ u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll)
{
spu.set_ch_value(MFC_WrTagMask, tagMask);
spu.set_ch_value(MFC_WrTagUpdate, waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY);
u32 rv; spu.get_ch_value(MFC_RdTagStat, rv); return rv;
return spu.get_ch_value(MFC_RdTagStat);
}
// Halt the SPU
@ -168,16 +164,15 @@ void spursHalt(SPUThread& spu)
void sys_spu_thread_exit(SPUThread& spu, s32 status)
{
u32 _v;
// Cancel any pending status update requests
spu.set_ch_value(MFC_WrTagUpdate, 0);
while (spu.get_ch_count(MFC_RdTagStat) != 1);
spu.get_ch_value(MFC_RdTagStat, _v);
spu.get_ch_value(MFC_RdTagStat);
// Wait for all pending DMA operations to complete
spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF);
spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL);
spu.get_ch_value(MFC_RdTagStat, _v);
spu.get_ch_value(MFC_RdTagStat);
spu.set_ch_value(SPU_WrOutMbox, status);
spu.stop_and_signal(0x102);
@ -185,16 +180,15 @@ void sys_spu_thread_exit(SPUThread& spu, s32 status)
void sys_spu_thread_group_exit(SPUThread& spu, s32 status)
{
u32 _v;
// Cancel any pending status update requests
spu.set_ch_value(MFC_WrTagUpdate, 0);
while (spu.get_ch_count(MFC_RdTagStat) != 1);
spu.get_ch_value(MFC_RdTagStat, _v);
spu.get_ch_value(MFC_RdTagStat);
// Wait for all pending DMA operations to complete
spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF);
spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL);
spu.get_ch_value(MFC_RdTagStat, _v);
spu.get_ch_value(MFC_RdTagStat);
spu.set_ch_value(SPU_WrOutMbox, status);
spu.stop_and_signal(0x101);
@ -214,9 +208,7 @@ s32 sys_spu_thread_send_event(SPUThread& spu, u8 spup, u32 data0, u32 data1)
spu.set_ch_value(SPU_WrOutMbox, data1);
spu.set_ch_value(SPU_WrOutIntrMbox, (spup << 24) | (data0 & 0x00FFFFFF));
spu.get_ch_value(SPU_RdInMbox, data0);
return data0;
return static_cast<u32>(spu.get_ch_value(SPU_RdInMbox));
}
s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status)
@ -231,18 +223,18 @@ s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status)
// Cancel any pending status update requests
spu.set_ch_value(MFC_WrTagUpdate, 0);
while (spu.get_ch_count(MFC_RdTagStat) != 1);
spu.get_ch_value(MFC_RdTagStat, result);
spu.get_ch_value(MFC_RdTagStat);
// Wait for all pending DMA operations to complete
spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF);
spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL);
spu.get_ch_value(MFC_RdTagStat, result);
spu.get_ch_value(MFC_RdTagStat);
do
{
spu.set_ch_value(SPU_WrOutMbox, status);
spu.stop_and_signal(0x120);
spu.get_ch_value(SPU_RdInMbox, result);
result = spu.get_ch_value(SPU_RdInMbox);
}
while (result == CELL_EBUSY);
@ -1719,9 +1711,8 @@ s32 spursTasketSaveTaskContext(SPUThread& spu)
v128 r;
spu.fpscr.Read(r);
ctxt->savedContextFpscr = r;
u32 r32;
spu.get_ch_value(SPU_RdEventMask, r32); ctxt->savedSpuWriteEventMask = r32;
spu.get_ch_value(MFC_RdTagMask, r32); ctxt->savedWriteTagGroupQueryMask = r32;
ctxt->savedSpuWriteEventMask = spu.get_ch_value(SPU_RdEventMask);
ctxt->savedWriteTagGroupQueryMask = spu.get_ch_value(MFC_RdTagMask);
// Store the processor context
const u32 contextSaveStorage = vm::cast(taskInfo->context_save_storage_and_alloc_ls_blocks & -0x80, HERE);

View File

@ -152,7 +152,7 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
vec[i] = vec_vars[i];
}
Label label_stop = c->newLabel();
label_stop = c->newLabel();
Label label_diff = c->newLabel();
Label label_code = c->newLabel();
std::vector<u32> words;
@ -163,6 +163,15 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
const u32 start = m_pos;
const u32 end = m_pos + (func.size() - 1) * 4;
// Create instruction labels (TODO: some of them are unnecessary)
for (u32 i = 1; i < func.size(); i++)
{
if (func[i])
{
instr_labels[i * 4 - 4 + m_pos] = c->newLabel();
}
}
// Set PC and check status
c->mov(SPU_OFF_32(pc), m_pos);
c->cmp(SPU_OFF_32(state), 0);
@ -713,6 +722,14 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
// Update position
m_pos = pos;
// Bind instruction label if necessary
const auto found = instr_labels.find(pos);
if (found != instr_labels.end())
{
c->bind(found->second);
}
// Execute recompiler function
(this->*s_spu_decoder.decode(op))({op});
@ -750,6 +767,27 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
work();
}
// Build instruction dispatch table
if (instr_table.isValid())
{
c->align(kAlignData, 8);
c->bind(instr_table);
for (u32 addr = start; addr < end; addr += 4)
{
const auto found = instr_labels.find(addr);
if (found != instr_labels.end())
{
c->embedLabel(found->second);
}
else
{
c->embedLabel(label_stop);
}
}
}
c->align(kAlignData, words_align);
c->bind(label_code);
for (u32 d : words)
@ -760,6 +798,9 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
work();
}
label_stop.reset();
instr_table.reset();
instr_labels.clear();
xmm_consts.clear();
// Compile and get function address
@ -1066,6 +1107,28 @@ void spu_recompiler::branch_fixed(u32 target)
{
using namespace asmjit;
// Check local branch
const auto local = instr_labels.find(target);
if (local != instr_labels.end() && local->second.isValid())
{
c->cmp(SPU_OFF_32(state), 0);
c->jz(local->second);
c->mov(SPU_OFF_32(pc), target);
c->ret();
return;
}
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
{
// Don't generate patch points in this mode
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
c->mov(SPU_OFF_32(pc), target);
c->xor_(qw0->r32(), qw0->r32());
c->jmp(x86::rax);
return;
}
// Set patch address as a third argument and fallback to it
Label patch_point = c->newLabel();
c->lea(*qw0, x86::qword_ptr(patch_point));
@ -1109,9 +1172,23 @@ void spu_recompiler::branch_indirect(spu_opcode_t op)
{
using namespace asmjit;
// Load indirect jump address
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
if (!instr_table.isValid())
{
// Request instruction table
instr_table = c->newLabel();
}
const u32 start = instr_labels.begin()->first;
const u32 end = instr_labels.rbegin()->first + 4;
// Load indirect jump address, choose between local and external
c->lea(x86::r10, x86::qword_ptr(instr_table));
c->lea(*qw1, x86::qword_ptr(*addr, 0 - start));
c->xor_(qw0->r32(), qw0->r32());
c->cmp(qw1->r32(), end - start);
c->cmovae(qw1->r32(), qw0->r32());
c->cmovb(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
c->cmovae(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
if (op.d)
{
@ -1119,22 +1196,34 @@ void spu_recompiler::branch_indirect(spu_opcode_t op)
}
else if (op.e)
{
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
c->mov(x86::r9d, SPU_OFF_32(ch_event_stat));
c->and_(x86::r9d, SPU_OFF_32(ch_event_mask));
c->and_(x86::r9d, SPU_EVENT_INTR_TEST);
c->cmp(x86::r9d, 0);
Label no_intr = c->newLabel();
Label intr = c->newLabel();
Label fail = c->newLabel();
Label noInterrupt = c->newLabel();
c->je(noInterrupt);
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
c->mov(qw1->r32(), SPU_OFF_32(ch_event_mask));
c->test(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED);
c->jnz(fail);
c->and_(qw1->r32(), SPU_OFF_32(ch_event_stat));
c->test(qw1->r32(), SPU_EVENT_INTR_IMPLEMENTED);
c->jnz(intr);
c->jmp(no_intr);
c->bind(fail);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(addr->r64(), reinterpret_cast<u64>(vm::base(0xffdead00)));
c->mov(asmjit::x86::dword_ptr(addr->r64()), "INTR"_u32);
c->bind(intr);
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_32(srr0), *addr);
branch_fixed(0);
c->mov(*addr, qw0->r32());
c->mov(x86::r10, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher)));
c->align(kAlignCode, 16);
c->bind(noInterrupt);
c->bind(no_intr);
}
c->mov(SPU_OFF_32(pc), *addr);
c->cmp(SPU_OFF_32(state), 0);
c->jnz(label_stop);
c->jmp(x86::r10);
}
@ -1348,25 +1437,33 @@ void spu_recompiler::MFSPR(spu_opcode_t op)
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
}
static void spu_rdch_ret(SPUThread& spu, void*, u32)
{
// MSVC workaround (TCO)
}
static void spu_rdch(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32))
{
const s64 result = _spu->get_ch_value(ch);
if (result < 0)
{
_ret = &spu_rdch_ret;
}
// Return channel value in the third argument
_ret(*_spu, _spu->_ptr<u8>(0), static_cast<u32>(result));
}
void spu_recompiler::RDCH(spu_opcode_t op)
{
using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, v128* out)
{
u32 value;
if (_spu->get_ch_value(ch, value))
{
*out = v128::from32r(value);
_spu->pc += 4;
}
};
auto read_channel = [&](X86Mem channel_ptr, bool sync = true)
{
Label wait = c->newLabel();
Label again = c->newLabel();
Label ret = c->newLabel();
c->mov(addr->r64(), channel_ptr);
c->xor_(qw0->r32(), qw0->r32());
c->align(kAlignCode, 16);
@ -1376,12 +1473,11 @@ void spu_recompiler::RDCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos]
{
// Do not continue after waiting
c->bind(wait);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rdch));
});
if (sync)
@ -1396,10 +1492,11 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->mov(channel_ptr, *qw0);
}
const XmmLink& vr = XmmAlloc();
c->movd(vr, *addr);
c->pslldq(vr, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(qw0->r32(), *addr);
c->bind(ret);
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
};
switch (op.ra)
@ -1415,42 +1512,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
case SPU_RdInMbox:
{
// TODO
Label wait = c->newLabel();
Label next = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->cmp(x86::byte_ptr(*cpu, offset32(&SPUThread::ch_in_mbox) + 1), 0);
c->jz(wait);
after.emplace_back([=]
{
// Do not continue after waiting
c->bind(wait);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
});
auto sub = [](SPUThread* _spu, v128* out, spu_function_t _ret)
{
// Workaround for gcc (TCO)
static thread_local u32 value;
if (!_spu->get_ch_value(SPU_RdInMbox, value))
{
// Workaround for MSVC (TCO)
fmt::raw_error("spu_recompiler::RDCH(): unexpected SPUThread::get_ch_value(SPU_RdInMbox) call");
}
*out = v128::from32r(value);
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
c->lea(*ls, SPU_OFF_128(gpr, op.rt));
c->lea(*qw0, x86::qword_ptr(next));
c->jmp(imm_ptr<void(*)(SPUThread*, v128*, spu_function_t)>(sub));
c->align(kAlignCode, 16);
c->bind(next);
return;
break;
}
case MFC_RdTagStat:
{
@ -1489,7 +1551,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
{
LOG_WARNING(SPU, "[0x%x] RDCH: RdDec", m_pos);
auto gate1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
auto sub1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
{
const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
@ -1500,7 +1562,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
auto gate2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
auto sub2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
{
const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
@ -1514,7 +1576,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->mov(SPU_OFF_32(pc), m_pos);
c->lea(*ls, SPU_OFF_128(gpr, op.rt));
c->lea(*qw0, asmjit::x86::qword_ptr(next));
c->jmp(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr<ftype>(gate1) : asmjit::imm_ptr<ftype>(gate2));
c->jmp(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr<ftype>(sub1) : asmjit::imm_ptr<ftype>(sub2));
c->align(asmjit::kAlignCode, 16);
c->bind(next);
return;
@ -1532,22 +1594,23 @@ void spu_recompiler::RDCH(spu_opcode_t op)
LOG_WARNING(SPU, "[0x%x] RDCH: RdEventStat", m_pos);
get_events();
Label wait = c->newLabel();
Label ret = c->newLabel();
c->jz(wait);
after.emplace_back([=, pos = m_pos]
{
// Do not continue after waiting
c->bind(wait);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rdch));
});
const XmmLink& vr = XmmAlloc();
c->movd(vr, *addr);
c->pslldq(vr, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(qw0->r32(), *addr);
c->bind(ret);
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
return;
}
case SPU_RdMachStat:
@ -1561,23 +1624,28 @@ void spu_recompiler::RDCH(spu_opcode_t op)
}
}
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
m_pos = -1;
c->mov(ls->r32(), op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rdch));
c->bind(ret);
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
}
static void spu_rchcnt(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32 res))
{
// Put result into the third argument
const u32 res = _spu->get_ch_count(ch);
_ret(*_spu, _spu->_ptr<u8>(0), res);
}
void spu_recompiler::RCHCNT(spu_opcode_t op)
{
using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, v128* out)
{
*out = v128::from32r(_spu->get_ch_count(ch));
_spu->pc += 4;
};
auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false)
{
// Load channel count
@ -1638,21 +1706,26 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
{
LOG_WARNING(SPU, "[0x%x] RCHCNT: RdEventStat", m_pos);
get_events();
c->setnz(addr->r8());
c->movzx(*addr, addr->r8());
c->movd(x86::xmm0, *addr);
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
return;
c->setnz(qw0->r8());
c->movzx(qw0->r32(), qw0->r8());
break;
}
default:
{
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rchcnt));
c->bind(ret);
break;
}
}
// Non-returnable fallback for unsupported events
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
m_pos = -1;
// Use result from the third argument
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
}
void spu_recompiler::SF(spu_opcode_t op)
@ -2310,18 +2383,35 @@ void spu_recompiler::MTSPR(spu_opcode_t op)
// Check SPUInterpreter for notes.
}
static void spu_wrch_ret(SPUThread& _spu, void*, u8*)
{
// MSVC workaround (TCO)
}
static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret)
{
if (!_spu->set_ch_value(ch, value))
{
_ret = &spu_wrch_ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}
static void spu_wrch_mfc(SPUThread* _spu, spu_function_t _ret)
{
if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd))
{
_ret = &spu_wrch_ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}
void spu_recompiler::WRCH(spu_opcode_t op)
{
using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, u32 value)
{
if (_spu->set_ch_value(ch, value))
{
_spu->pc += 4;
}
};
switch (op.ra)
{
case SPU_WrSRR0:
@ -2332,42 +2422,14 @@ void spu_recompiler::WRCH(spu_opcode_t op)
}
case SPU_WrOutIntrMbox:
{
auto sub = [](SPUThread* _spu, spu_function_t _ret, u32 value)
{
if (!_spu->set_ch_value(SPU_WrOutIntrMbox, value))
{
fmt::raw_error("spu_recompiler::WRCH(): unexpected SPUThread::set_ch_value(SPU_WrOutIntrMbox) call");
}
// Continue
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
Label ret = c->newLabel();
Label wait = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->bt(SPU_OFF_64(ch_out_intr_mbox), spu_channel::off_count);
c->jc(wait);
after.emplace_back([=]
{
// Do not continue after waiting
c->bind(wait);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
});
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t, u32)>(sub));
c->align(kAlignCode, 16);
c->bind(ret);
return;
// Can't seemingly be optimized
break;
}
case SPU_WrOutMbox:
{
Label wait = c->newLabel();
Label again = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox));
c->align(kAlignCode, 16);
@ -2378,16 +2440,17 @@ void spu_recompiler::WRCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos]
{
// Do not continue after waiting
c->bind(wait);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
});
c->bts(*qw0, spu_channel::off_count);
c->lock().cmpxchg(SPU_OFF_64(ch_out_mbox), *qw0);
c->jnz(again);
c->bind(ret);
return;
}
case MFC_WrTagMask:
@ -2401,21 +2464,11 @@ void spu_recompiler::WRCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos]
{
auto sub = [](SPUThread* _spu, spu_function_t _ret, u32 value)
{
if (!_spu->set_ch_value(MFC_WrTagMask, value))
{
fmt::raw_error("spu_recompiler::WRCH(): unexpected SPUThread::set_ch_value(MFC_WrTagMask) call");
}
// Continue
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
c->bind(upd);
c->mov(SPU_OFF_32(pc), pos);
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t, u32)>(sub));
c->lea(ls->r32(), MFC_WrTagMask);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
});
c->bind(ret);
@ -2434,8 +2487,9 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{
c->bind(fail);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
c->bind(zero);
c->mov(SPU_OFF_32(ch_tag_upd), qw0->r32());
@ -2496,22 +2550,12 @@ void spu_recompiler::WRCH(spu_opcode_t op)
case MFC_Cmd:
{
// TODO
auto sub = [](SPUThread* _spu, spu_function_t _ret)
{
if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd))
{
throw cpu_flag::ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
Label ret = c->newLabel();
c->mov(*addr, SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(SPU_OFF_8(ch_mfc_cmd, &spu_mfc_cmd::cmd), addr->r8());
c->mov(SPU_OFF_32(pc), m_pos);
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub));
c->jmp(imm_ptr(spu_wrch_mfc));
c->align(kAlignCode, 16);
c->bind(ret);
return;
@ -2555,6 +2599,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
case SPU_WrEventMask:
{
Label fail = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(*addr, ~SPU_EVENT_IMPLEMENTED);
c->mov(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED);
@ -2567,16 +2612,19 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{
c->bind(fail);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
});
c->mov(SPU_OFF_32(ch_event_mask), qw0->r32());
c->bind(ret);
return;
}
case SPU_WrEventAck:
{
Label fail = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->test(qw0->r32(), ~SPU_EVENT_IMPLEMENTED);
c->jnz(fail);
@ -2585,8 +2633,9 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{
c->bind(fail);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
});
c->not_(qw0->r32());
@ -2599,11 +2648,13 @@ void spu_recompiler::WRCH(spu_opcode_t op)
}
}
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->mov(ls->r32(), op.ra);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
m_pos = -1;
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
c->bind(ret);
}
void spu_recompiler::BIZ(spu_opcode_t op)

View File

@ -59,6 +59,15 @@ private:
std::vector<std::function<void()>> after;
std::vector<std::function<void()>> consts;
// Function return label
asmjit::Label label_stop;
// Indirect branch dispatch table
asmjit::Label instr_table;
// All valid instruction labels
std::map<u32, asmjit::Label> instr_labels;
// All emitted 128-bit consts
std::map<std::pair<u64, u64>, asmjit::Label> xmm_consts;

View File

@ -94,14 +94,14 @@ bool spu_interpreter::MFSPR(SPUThread& spu, spu_opcode_t op)
bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op)
{
u32 result;
const s64 result = spu.get_ch_value(op.ra);
if (!spu.get_ch_value(op.ra, result))
if (result < 0)
{
return false;
}
spu.gpr[op.rt] = v128::from32r(result);
spu.gpr[op.rt] = v128::from32r(static_cast<u32>(result));
return true;
}

View File

@ -119,6 +119,22 @@ void fmt_class_string<spu_decoder_type>::format(std::string& out, u64 arg)
});
}
template <>
void fmt_class_string<spu_block_size_type>::format(std::string& out, u64 arg)
{
format_enum(out, arg, [](spu_block_size_type type)
{
switch (type)
{
case spu_block_size_type::safe: return "Safe";
case spu_block_size_type::mega: return "Mega";
case spu_block_size_type::giga: return "Giga";
}
return unknown;
});
}
namespace spu
{
namespace scheduler
@ -1497,11 +1513,11 @@ u32 SPUThread::get_ch_count(u32 ch)
fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???");
}
bool SPUThread::get_ch_value(u32 ch, u32& out)
s64 SPUThread::get_ch_value(u32 ch)
{
LOG_TRACE(SPU, "get_ch_value(ch=%d [%s])", ch, ch < 128 ? spu_ch_name[ch] : "???");
auto read_channel = [&](spu_channel& channel)
auto read_channel = [&](spu_channel& channel) -> s64
{
for (int i = 0; i < 10 && channel.get_count() == 0; i++)
{
@ -1515,25 +1531,26 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
}
}
u32 out;
while (!channel.try_pop(out))
{
if (test(state, cpu_flag::stop))
{
return false;
return -1;
}
thread_ctrl::wait();
}
return true;
return out;
};
switch (ch)
{
case SPU_RdSRR0:
{
out = srr0;
return true;
return srr0;
}
case SPU_RdInMbox:
{
@ -1551,6 +1568,8 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
}
}
u32 out;
if (const uint old_count = ch_in_mbox.try_pop(out))
{
if (old_count == 4 /* SPU_IN_MBOX_THRESHOLD */) // TODO: check this
@ -1558,12 +1577,12 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
int_ctrl[2].set(SPU_INT2_STAT_SPU_MAILBOX_THRESHOLD_INT);
}
return true;
return out;
}
if (test(state & cpu_flag::stop))
{
return false;
return -1;
}
thread_ctrl::wait();
@ -1579,9 +1598,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
if (ch_tag_stat.get_count())
{
out = ch_tag_stat.get_value();
u32 out = ch_tag_stat.get_value();
ch_tag_stat.set_value(0, false);
return true;
return out;
}
// Will stall infinitely
@ -1590,8 +1609,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
case MFC_RdTagMask:
{
out = ch_tag_mask;
return true;
return ch_tag_mask;
}
case SPU_RdSigNotify1:
@ -1608,9 +1626,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
{
if (ch_atomic_stat.get_count())
{
out = ch_atomic_stat.get_value();
u32 out = ch_atomic_stat.get_value();
ch_atomic_stat.set_value(0, false);
return true;
return out;
}
// Will stall infinitely
@ -1621,9 +1639,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
{
if (ch_stall_stat.get_count())
{
out = ch_stall_stat.get_value();
u32 out = ch_stall_stat.get_value();
ch_stall_stat.set_value(0, false);
return true;
return out;
}
// Will stall infinitely
@ -1632,19 +1650,18 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
case SPU_RdDec:
{
out = ch_dec_value - (u32)(get_timebased_time() - ch_dec_start_timestamp);
u32 out = ch_dec_value - (u32)(get_timebased_time() - ch_dec_start_timestamp);
//Polling: We might as well hint to the scheduler to slot in another thread since this one is counting down
if (g_cfg.core.spu_loop_detection && out > spu::scheduler::native_jiffy_duration_us)
std::this_thread::yield();
return true;
return out;
}
case SPU_RdEventMask:
{
out = ch_event_mask;
return true;
return ch_event_mask;
}
case SPU_RdEventStat:
@ -1658,8 +1675,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
if (res)
{
out = res;
return true;
return res;
}
vm::waiter waiter;
@ -1678,22 +1694,20 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
{
if (test(state & cpu_flag::stop))
{
return false;
return -1;
}
thread_ctrl::wait_for(100);
}
out = res;
return true;
return res;
}
case SPU_RdMachStat:
{
// HACK: "Not isolated" status
// Return SPU Interrupt status in LSB
out = interrupts_enabled == true;
return true;
return interrupts_enabled == true;
}
}

View File

@ -609,7 +609,7 @@ public:
void set_events(u32 mask);
void set_interrupt_status(bool enable);
u32 get_ch_count(u32 ch);
bool get_ch_value(u32 ch, u32& out);
s64 get_ch_value(u32 ch);
bool set_ch_value(u32 ch, u32 value);
bool stop_and_signal(u32 code);
void halt();

View File

@ -30,6 +30,13 @@ enum class spu_decoder_type
llvm,
};
enum class spu_block_size_type
{
safe,
mega,
giga,
};
enum class lib_loading_type
{
automatic,
@ -303,6 +310,7 @@ struct cfg_root : cfg::node
cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free
cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield
cfg::_bool spu_shared_runtime{this, "SPU Shared Runtime", true}; // Share compiled SPU functions between all threads
cfg::_enum<spu_block_size_type> spu_block_size{this, "SPU Block Size"};
cfg::_enum<lib_loading_type> lib_loading{this, "Lib Loader", lib_loading_type::liblv2only};
cfg::_bool hook_functions{this, "Hook static functions"};