mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-28 14:54:11 +00:00
Initial PPU LLVM implementation for aarch64
This commit is contained in:
parent
a5f9256ac6
commit
56cc5d9355
@ -26,6 +26,7 @@
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/IntrinsicsX86.h"
|
||||
#include "llvm/IR/IntrinsicsAArch64.h"
|
||||
#include "llvm/IR/InlineAsm.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
@ -3898,4 +3899,39 @@ struct fmt_unveil<llvm::TypeSize, void>
|
||||
}
|
||||
};
|
||||
|
||||
// Inline assembly wrappers.
|
||||
// TODO: Move these to proper location and replace macros with templates
|
||||
static inline
|
||||
llvm::InlineAsm* compile_inline_asm(
|
||||
llvm::Type* returnType,
|
||||
llvm::ArrayRef<llvm::Type*> argTypes,
|
||||
const std::string& code,
|
||||
const std::string& constraints)
|
||||
{
|
||||
const auto callSig = llvm::FunctionType::get(returnType, argTypes, false);
|
||||
return llvm::InlineAsm::get(callSig, code, constraints, true, false);
|
||||
}
|
||||
|
||||
// Helper for ASM generation with dynamic number of arguments
|
||||
#define LLVM_ASM(asm_, args, constraints, irb, ctx)\
|
||||
do {\
|
||||
std::vector<llvm::Type*> _argTypes;\
|
||||
_argTypes.reserve(args.size());\
|
||||
for (const auto& _arg : args) _argTypes.push_back(_arg->getType());\
|
||||
auto _returnType = llvm::Type::getVoidTy(ctx); \
|
||||
llvm::FunctionCallee _callee = compile_inline_asm(_returnType, _argTypes, asm_, constraints); \
|
||||
auto _c = irb->CreateCall(_callee, args); \
|
||||
_c->addFnAttr(llvm::Attribute::AlwaysInline); \
|
||||
} while(0)
|
||||
|
||||
// Helper for ASM generation with 0 args
|
||||
#define LLVM_ASM_0(asm_, irb, ctx)\
|
||||
do {\
|
||||
const auto _voidTy = llvm::Type::getVoidTy(ctx); \
|
||||
auto _callee = compile_inline_asm(_voidTy, std::nullopt, asm_, ""); \
|
||||
auto _c = irb->CreateCall(_callee); \
|
||||
_c->setTailCall(); \
|
||||
_c->addFnAttr(llvm::Attribute::AlwaysInline); \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
40
rpcs3/Emu/CPU/Hypervisor.h
Normal file
40
rpcs3/Emu/CPU/Hypervisor.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
|
||||
namespace rpcs3
|
||||
{
|
||||
union alignas(16) hypervisor_context_t
|
||||
{
|
||||
u64 regs[16];
|
||||
|
||||
struct
|
||||
{
|
||||
u64 pc;
|
||||
u64 sp;
|
||||
|
||||
u64 x18;
|
||||
u64 x19;
|
||||
u64 x20;
|
||||
u64 x21;
|
||||
u64 x22;
|
||||
u64 x23;
|
||||
u64 x24;
|
||||
u64 x25;
|
||||
u64 x26;
|
||||
u64 x27;
|
||||
u64 x28;
|
||||
u64 x29;
|
||||
u64 x30;
|
||||
|
||||
// x0-x17 unused
|
||||
} aarch64;
|
||||
|
||||
struct
|
||||
{
|
||||
u64 sp;
|
||||
|
||||
// Other regs unused
|
||||
} x86;
|
||||
};
|
||||
}
|
@ -222,7 +222,7 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
||||
#endif
|
||||
|
||||
// Save native stack pointer for longjmp emulation
|
||||
c.mov(x86::qword_ptr(args[0], ::offset32(&ppu_thread::saved_native_sp)), x86::rsp);
|
||||
c.mov(x86::qword_ptr(args[0], ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)), x86::rsp);
|
||||
|
||||
// Initialize args
|
||||
c.mov(x86::r13, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
@ -291,37 +291,48 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
||||
// and https://developer.arm.com/documentation/den0024/a/The-ABI-for-ARM-64-bit-Architecture/Register-use-in-the-AArch64-Procedure-Call-Standard/Parameters-in-general-purpose-registers
|
||||
// for AArch64 calling convention
|
||||
|
||||
// Save sp for native longjmp emulation
|
||||
Label native_sp_offset = c.newLabel();
|
||||
c.ldr(a64::x10, arm::Mem(native_sp_offset));
|
||||
// sp not allowed to be used in load/stores directly
|
||||
c.mov(a64::x15, a64::sp);
|
||||
c.str(a64::x15, arm::Mem(args[0], a64::x10));
|
||||
|
||||
// Push callee saved registers to the stack
|
||||
// Push callee saved registers to the hv context
|
||||
// Assume our LLVM compiled code is unsafe and can clobber our stack. GHC on aarch64 treats stack as scratch.
|
||||
// We also want to store the register context at a fixed place so we can read the hypervisor state from any lcoation.
|
||||
// We need to save x18-x30 = 13 x 8B each + 8 bytes for 16B alignment = 112B
|
||||
c.sub(a64::sp, a64::sp, Imm(112));
|
||||
c.stp(a64::x18, a64::x19, arm::Mem(a64::sp));
|
||||
c.stp(a64::x20, a64::x21, arm::Mem(a64::sp, 16));
|
||||
c.stp(a64::x22, a64::x23, arm::Mem(a64::sp, 32));
|
||||
c.stp(a64::x24, a64::x25, arm::Mem(a64::sp, 48));
|
||||
c.stp(a64::x26, a64::x27, arm::Mem(a64::sp, 64));
|
||||
c.stp(a64::x28, a64::x29, arm::Mem(a64::sp, 80));
|
||||
c.str(a64::x30, arm::Mem(a64::sp, 96));
|
||||
|
||||
// Pre-context save
|
||||
// Layout:
|
||||
// pc, sp
|
||||
// x18, x19...x30
|
||||
// NOTE: Do not touch x19..x30 before saving the registers!
|
||||
const u64 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
|
||||
Label hv_ctx_pc = c.newLabel(); // Used to hold the far jump return address
|
||||
|
||||
// Sanity
|
||||
ensure(hv_register_array_offset < 4096); // Imm10
|
||||
|
||||
c.mov(a64::x15, args[0]);
|
||||
c.add(a64::x14, a64::x15, Imm(hv_register_array_offset)); // Per-thread context save
|
||||
|
||||
c.adr(a64::x15, hv_ctx_pc); // x15 = pc
|
||||
c.mov(a64::x13, a64::sp); // x16 = sp
|
||||
|
||||
c.stp(a64::x15, a64::x13, arm::Mem(a64::x14));
|
||||
c.stp(a64::x18, a64::x19, arm::Mem(a64::x14, 16));
|
||||
c.stp(a64::x20, a64::x21, arm::Mem(a64::x14, 32));
|
||||
c.stp(a64::x22, a64::x23, arm::Mem(a64::x14, 48));
|
||||
c.stp(a64::x24, a64::x25, arm::Mem(a64::x14, 64));
|
||||
c.stp(a64::x26, a64::x27, arm::Mem(a64::x14, 80));
|
||||
c.stp(a64::x28, a64::x29, arm::Mem(a64::x14, 96));
|
||||
c.str(a64::x30, arm::Mem(a64::x14, 112));
|
||||
|
||||
// Load REG_Base - use absolute jump target to bypass rel jmp range limits
|
||||
Label exec_addr = c.newLabel();
|
||||
c.ldr(a64::x19, arm::Mem(exec_addr));
|
||||
c.mov(a64::x19, Imm(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
c.ldr(a64::x19, arm::Mem(a64::x19));
|
||||
// Load PPUThread struct base -> REG_Sp
|
||||
const arm::GpX ppu_t_base = a64::x20;
|
||||
c.mov(ppu_t_base, args[0]);
|
||||
// Load PC
|
||||
const arm::GpX pc = a64::x15;
|
||||
Label cia_offset = c.newLabel();
|
||||
const arm::GpX cia_addr_reg = a64::x11;
|
||||
// Load offset value
|
||||
c.ldr(cia_addr_reg, arm::Mem(cia_offset));
|
||||
c.mov(cia_addr_reg, Imm(static_cast<u64>(::offset32(&ppu_thread::cia))));
|
||||
// Load cia
|
||||
c.ldr(a64::w15, arm::Mem(ppu_t_base, cia_addr_reg));
|
||||
// Multiply by 2 to index into ptr table
|
||||
@ -343,44 +354,45 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
||||
c.lsr(call_target, call_target, Imm(16));
|
||||
|
||||
// Load registers
|
||||
Label base_addr = c.newLabel();
|
||||
c.ldr(a64::x22, arm::Mem(base_addr));
|
||||
c.mov(a64::x22, Imm(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.ldr(a64::x22, arm::Mem(a64::x22));
|
||||
|
||||
Label gpr_addr_offset = c.newLabel();
|
||||
const arm::GpX gpr_addr_reg = a64::x9;
|
||||
c.ldr(gpr_addr_reg, arm::Mem(gpr_addr_offset));
|
||||
c.mov(gpr_addr_reg, Imm(static_cast<u64>(::offset32(&ppu_thread::gpr))));
|
||||
c.add(gpr_addr_reg, gpr_addr_reg, ppu_t_base);
|
||||
c.ldr(a64::x23, arm::Mem(gpr_addr_reg));
|
||||
c.ldr(a64::x24, arm::Mem(gpr_addr_reg, 8));
|
||||
c.ldr(a64::x25, arm::Mem(gpr_addr_reg, 16));
|
||||
|
||||
// GHC frame for the guest. This seems dodgy but the only thing stored on stack is actually registers before making calls to C++ code.
|
||||
// Injected stack frames also work, but are not free and are completely unnecessary.
|
||||
c.sub(a64::sp, a64::sp, Imm(4096));
|
||||
|
||||
// Execute LLE call
|
||||
c.blr(call_target);
|
||||
|
||||
// Restore registers from the stack
|
||||
c.ldp(a64::x18, a64::x19, arm::Mem(a64::sp));
|
||||
c.ldp(a64::x20, a64::x21, arm::Mem(a64::sp, 16));
|
||||
c.ldp(a64::x22, a64::x23, arm::Mem(a64::sp, 32));
|
||||
c.ldp(a64::x24, a64::x25, arm::Mem(a64::sp, 48));
|
||||
c.ldp(a64::x26, a64::x27, arm::Mem(a64::sp, 64));
|
||||
c.ldp(a64::x28, a64::x29, arm::Mem(a64::sp, 80));
|
||||
c.ldr(a64::x30, arm::Mem(a64::sp, 96));
|
||||
// Restore stack ptr
|
||||
c.add(a64::sp, a64::sp, Imm(112));
|
||||
// Return
|
||||
c.ret(a64::x30);
|
||||
// Return address after far jump. Reset sp and start unwinding...
|
||||
c.bind(hv_ctx_pc);
|
||||
|
||||
c.bind(exec_addr);
|
||||
c.embedUInt64(reinterpret_cast<u64>(&vm::g_exec_addr));
|
||||
c.bind(base_addr);
|
||||
c.embedUInt64(reinterpret_cast<u64>(&vm::g_base_addr));
|
||||
c.bind(cia_offset);
|
||||
c.embedUInt64(static_cast<u64>(::offset32(&ppu_thread::cia)));
|
||||
c.bind(gpr_addr_offset);
|
||||
c.embedUInt64(static_cast<u64>(::offset32(&ppu_thread::gpr)));
|
||||
c.bind(native_sp_offset);
|
||||
c.embedUInt64(static_cast<u64>(::offset32(&ppu_thread::saved_native_sp)));
|
||||
// Execution guard undo (unneded since we're going to hard-reset the SP)
|
||||
//c.add(a64::sp, a64::sp, Imm(4096));
|
||||
|
||||
// We either got here through normal "ret" which keeps our x20 intact, or we jumped here and the escape reset our x20 reg
|
||||
// Either way, x20 contains our thread base and we forcefully reset the stack pointer
|
||||
c.add(a64::x14, a64::x20, Imm(hv_register_array_offset)); // Per-thread context save
|
||||
|
||||
c.ldr(a64::x15, arm::Mem(a64::x14, 8));
|
||||
c.ldp(a64::x18, a64::x19, arm::Mem(a64::x14, 16));
|
||||
c.ldp(a64::x20, a64::x21, arm::Mem(a64::x14, 32));
|
||||
c.ldp(a64::x22, a64::x23, arm::Mem(a64::x14, 48));
|
||||
c.ldp(a64::x24, a64::x25, arm::Mem(a64::x14, 64));
|
||||
c.ldp(a64::x26, a64::x27, arm::Mem(a64::x14, 80));
|
||||
c.ldp(a64::x28, a64::x29, arm::Mem(a64::x14, 96));
|
||||
c.ldr(a64::x30, arm::Mem(a64::x14, 112));
|
||||
|
||||
// Return
|
||||
c.mov(a64::sp, a64::x15);
|
||||
c.ret(a64::x30);
|
||||
#endif
|
||||
});
|
||||
|
||||
@ -390,11 +402,20 @@ const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_esc
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
// Restore native stack pointer (longjmp emulation)
|
||||
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&ppu_thread::saved_native_sp)));
|
||||
c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)));
|
||||
|
||||
// Return to the return location
|
||||
c.sub(x86::rsp, 8);
|
||||
c.ret();
|
||||
#else
|
||||
// We really shouldn't be using this, but an implementation shoudln't hurt
|
||||
// Far jump return. Only clobbers x30.
|
||||
const arm::GpX ppu_t_base = a64::x20;
|
||||
const u64 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
|
||||
c.mov(ppu_t_base, args[0]);
|
||||
c.mov(a64::x30, Imm(hv_register_array_offset));
|
||||
c.ldr(a64::x30, arm::Mem(ppu_t_base, a64::x30));
|
||||
c.ret(a64::x30);
|
||||
#endif
|
||||
});
|
||||
|
||||
@ -2265,6 +2286,9 @@ void ppu_thread::exec_task()
|
||||
{
|
||||
if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static)
|
||||
{
|
||||
// HVContext push to allow recursion. This happens with guest callback invocations.
|
||||
const auto old_hv_ctx = hv_ctx;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (state) [[unlikely]]
|
||||
@ -2276,6 +2300,8 @@ void ppu_thread::exec_task()
|
||||
ppu_gateway(this);
|
||||
}
|
||||
|
||||
// HVContext pop
|
||||
hv_ctx = old_hv_ctx;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2314,6 +2340,8 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3
|
||||
{
|
||||
prio.raw().prio = _prio;
|
||||
|
||||
memset(&hv_ctx, 0, sizeof(hv_ctx));
|
||||
|
||||
gpr[1] = stack_addr + stack_size - ppu_stack_start_offset;
|
||||
|
||||
gpr[13] = param.tls_addr;
|
||||
@ -3502,7 +3530,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||
|
||||
if (notify)
|
||||
{
|
||||
bool notified = false;
|
||||
bool notified = false;
|
||||
|
||||
if (ppu.res_notify_time == (vm::reservation_acquire(notify) & -128))
|
||||
{
|
||||
@ -5277,12 +5305,14 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
|
||||
// Translate
|
||||
if (const auto func = translator.Translate(module_part.funcs[fi]))
|
||||
{
|
||||
#ifdef ARCH_X64 // TODO
|
||||
// Run optimization passes
|
||||
#if LLVM_VERSION_MAJOR < 17
|
||||
pm.run(*func);
|
||||
#else
|
||||
fpm.run(*func, fam);
|
||||
#endif
|
||||
#endif // ARCH_X64
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -5297,12 +5327,14 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
|
||||
{
|
||||
if (const auto func = translator.GetSymbolResolver(whole_module))
|
||||
{
|
||||
#ifdef ARCH_X64 // TODO
|
||||
// Run optimization passes
|
||||
#if LLVM_VERSION_MAJOR < 17
|
||||
pm.run(*func);
|
||||
#else
|
||||
fpm.run(*func, fam);
|
||||
#endif
|
||||
#endif // ARCH_X64
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "../CPU/CPUThread.h"
|
||||
#include "../CPU/Hypervisor.h"
|
||||
#include "../Memory/vm_ptr.h"
|
||||
#include "Utilities/lockless.h"
|
||||
#include "Utilities/BitField.h"
|
||||
@ -163,6 +164,9 @@ public:
|
||||
|
||||
using cpu_thread::operator=;
|
||||
|
||||
// Hypervisor context data
|
||||
alignas(16) rpcs3::hypervisor_context_t hv_ctx; // HV context for gate enter exit. Keep at a low struct offset.
|
||||
|
||||
u64 gpr[32] = {}; // General-Purpose Registers
|
||||
f64 fpr[32] = {}; // Floating Point Registers
|
||||
v128 vr[32] = {}; // Vector Registers
|
||||
|
@ -208,8 +208,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, ptr, m_ir->getInt32((+cpu_flag::wait).operator u32()), llvm::MaybeAlign{4}, llvm::AtomicOrdering::AcquireRelease);
|
||||
|
||||
// Create tail call to the check function
|
||||
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
VMEscape(Call(GetType<void>(), "__check", m_thread, GetAddr()));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -321,7 +320,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
|
||||
if (vec_addrs.empty())
|
||||
{
|
||||
// Possible special case for no functions (allowing the do-while optimization)
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->CreateRetVoid(); // FIXME: Aarch64. It should work fine as long as there is no callchain beyond this function with a ret path.
|
||||
replace_intrinsics(*m_function);
|
||||
return m_function;
|
||||
}
|
||||
@ -378,7 +377,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
|
||||
// Set insertion point to afterloop_block
|
||||
m_ir->SetInsertPoint(after_loop);
|
||||
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->CreateRetVoid(); // FIXME: Aarch64 - Should be ok as long as no ret-based callchain proceeds from here
|
||||
|
||||
replace_intrinsics(*m_function);
|
||||
return m_function;
|
||||
@ -482,8 +481,8 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
||||
|
||||
if (_target >= u32{umax})
|
||||
{
|
||||
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr<u32>(::narrow<u32>(m_addr + base)))));
|
||||
m_ir->CreateRetVoid();
|
||||
auto c = Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr<u32>(::narrow<u32>(m_addr + base)))));
|
||||
VMEscape(c);
|
||||
return;
|
||||
}
|
||||
else if (_target >= caddr && _target <= cend)
|
||||
@ -565,7 +564,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
||||
const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)});
|
||||
c->setTailCallKind(llvm::CallInst::TCK_Tail);
|
||||
c->setCallingConv(CallingConv::GHC);
|
||||
m_ir->CreateRetVoid();
|
||||
VMEscape(c);
|
||||
}
|
||||
|
||||
Value* PPUTranslator::RegInit(Value*& local)
|
||||
@ -779,8 +778,8 @@ void PPUTranslator::TestAborted()
|
||||
m_ir->SetInsertPoint(vcheck);
|
||||
|
||||
// Create tail call to the check function
|
||||
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
auto c = Call(GetType<void>(), "__check", m_thread, GetAddr());
|
||||
VMEscape(c);
|
||||
m_ir->SetInsertPoint(body);
|
||||
}
|
||||
|
||||
@ -2206,16 +2205,14 @@ void PPUTranslator::SC(ppu_opcode_t op)
|
||||
|
||||
if (index < 1024)
|
||||
{
|
||||
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
auto c = Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
|
||||
VMEscape(c, true);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
auto c = Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
|
||||
VMEscape(c, true);
|
||||
}
|
||||
|
||||
void PPUTranslator::B(ppu_opcode_t op)
|
||||
@ -2776,9 +2773,9 @@ void PPUTranslator::LWARX(ppu_opcode_t op)
|
||||
{
|
||||
RegStore(Trunc(GetAddr()), m_cia);
|
||||
FlushRegisters();
|
||||
Call(GetType<void>(), "__resinterp", m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
|
||||
auto inst = Call(GetType<void>(), "__resinterp", m_thread);
|
||||
VMEscape(inst, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2928,9 +2925,9 @@ void PPUTranslator::LDARX(ppu_opcode_t op)
|
||||
{
|
||||
RegStore(Trunc(GetAddr()), m_cia);
|
||||
FlushRegisters();
|
||||
Call(GetType<void>(), "__resinterp", m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
|
||||
auto inst = Call(GetType<void>(), "__resinterp", m_thread);
|
||||
VMEscape(inst, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -4998,9 +4995,8 @@ void PPUTranslator::FCFID(ppu_opcode_t op)
|
||||
void PPUTranslator::UNK(ppu_opcode_t op)
|
||||
{
|
||||
FlushRegisters();
|
||||
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
auto c = Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
|
||||
VMEscape(c, true);
|
||||
}
|
||||
|
||||
|
||||
@ -5279,9 +5275,8 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
|
||||
|
||||
void PPUTranslator::Trap()
|
||||
{
|
||||
Call(GetType<void>(), "__trap", m_thread, GetAddr());
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
auto c = Call(GetType<void>(), "__trap", m_thread, GetAddr());
|
||||
VMEscape(c);
|
||||
}
|
||||
|
||||
Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi)
|
||||
@ -5328,6 +5323,42 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void PPUTranslator::VMEscape([[maybe_unused]] llvm::CallInst* tail_call, [[maybe_unused]] bool skip_flush)
|
||||
{
|
||||
//if (!skip_flush)
|
||||
{
|
||||
// Flush
|
||||
FlushRegisters();
|
||||
}
|
||||
|
||||
#ifdef ARCH_X64
|
||||
// Optionally flag last call as a tail
|
||||
if (tail_call)
|
||||
{
|
||||
tail_call->setTailCall();
|
||||
}
|
||||
|
||||
// This is actually AMD64 specific but good enough for now
|
||||
m_ir->CreateRetVoid();
|
||||
#else
|
||||
|
||||
// Validation. Make sure we're escaping from a correct context. Only guest JIT should ever go through the "escape" gate.
|
||||
const auto bb = m_ir->GetInsertPoint();
|
||||
const auto arg = llvm::dyn_cast<llvm::Argument>(m_thread);
|
||||
ensure(bb->getParent()->getName().str() == arg->getParent()->getName().str());
|
||||
|
||||
const u32 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
|
||||
const std::string asm_ = fmt::format(
|
||||
"ldr x20, $0;\n"
|
||||
"ldr x30, [x20, #%u];\n",
|
||||
hv_register_array_offset);
|
||||
|
||||
LLVM_ASM(asm_, std::array{ m_thread }, "m", m_ir, m_function->getContext());
|
||||
m_ir->CreateRetVoid();
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPUTranslator::build_interpreter()
|
||||
{
|
||||
#define BUILD_VEC_INST(i) { \
|
||||
@ -5343,8 +5374,7 @@ void PPUTranslator::build_interpreter()
|
||||
op.vb = 2; \
|
||||
op.vc = 3; \
|
||||
this->i(op); \
|
||||
FlushRegisters(); \
|
||||
m_ir->CreateRetVoid(); \
|
||||
VMEscape(); \
|
||||
replace_intrinsics(*m_function); \
|
||||
}
|
||||
|
||||
|
@ -150,6 +150,9 @@ public:
|
||||
// Emit function call
|
||||
void CallFunction(u64 target, llvm::Value* indirect = nullptr);
|
||||
|
||||
// Emit escape sequence back to hypervisor
|
||||
void VMEscape(llvm::CallInst* tail_call = nullptr, bool skip_flush = false);
|
||||
|
||||
// Emit state check mid-block
|
||||
void TestAborted();
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user