mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-15 22:21:25 +00:00
Complete PPU support
This commit is contained in:
parent
34549445a8
commit
cba658baba
@ -2,6 +2,21 @@
|
||||
#include "AArch64JIT.h"
|
||||
#include "../Hypervisor.h"
|
||||
|
||||
LOG_CHANNEL(jit_log, "JIT");
|
||||
|
||||
#define STDOUT_DEBUG
|
||||
|
||||
#ifndef STDOUT_DEBUG
|
||||
#define DPRINT jit_log.trace
|
||||
#else
|
||||
#define DPRINT(...)\
|
||||
do {\
|
||||
printf(__VA_ARGS__);\
|
||||
printf("\n");\
|
||||
fflush(stdout);\
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
namespace aarch64
|
||||
{
|
||||
// FIXME: This really should be part of fmt
|
||||
@ -23,11 +38,11 @@ namespace aarch64
|
||||
using function_info_t = GHC_frame_preservation_pass::function_info_t;
|
||||
|
||||
GHC_frame_preservation_pass::GHC_frame_preservation_pass(
|
||||
gpr base_reg,
|
||||
u32 hv_ctx_offset,
|
||||
const std::vector<std::pair<std::string, gpr>>& base_register_lookup,
|
||||
std::function<bool(const std::string&)> exclusion_callback)
|
||||
{
|
||||
execution_context.base_register = base_reg;
|
||||
execution_context.base_register_lookup = base_register_lookup;
|
||||
execution_context.hypervisor_context_offset = hv_ctx_offset;
|
||||
this->exclusion_callback = exclusion_callback;
|
||||
}
|
||||
@ -118,6 +133,13 @@ namespace aarch64
|
||||
instruction_info_t result{};
|
||||
if (auto ci = llvm::dyn_cast<llvm::CallInst>(i))
|
||||
{
|
||||
// Watch out for injected ASM blocks...
|
||||
if (llvm::isa<llvm::InlineAsm>(ci->getCalledOperand()))
|
||||
{
|
||||
// Not a real call. This is just an insert of inline asm
|
||||
return result;
|
||||
}
|
||||
|
||||
result.is_call_inst = true;
|
||||
result.is_returning = true;
|
||||
result.preserve_stack = !ci->isTailCall();
|
||||
@ -126,12 +148,15 @@ namespace aarch64
|
||||
|
||||
if (!result.callee)
|
||||
{
|
||||
// TODO: What are these?????? Patchpoints maybe? Need to check again
|
||||
result.is_call_inst = f.getName() == "__spu-null";
|
||||
// Indirect call (call from raw value).
|
||||
result.is_indirect = true;
|
||||
result.callee_is_GHC = ci->getCallingConv() == llvm::CallingConv::GHC;
|
||||
result.callee_name = "__indirect_call";
|
||||
}
|
||||
else
|
||||
{
|
||||
result.callee_is_GHC = result.callee->getCallingConv() == llvm::CallingConv::GHC;
|
||||
result.callee_name = result.callee->getName().str();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -145,7 +170,8 @@ namespace aarch64
|
||||
auto targetbb = bi->getSuccessor(0);
|
||||
|
||||
result.callee = targetbb->getParent();
|
||||
result.is_call_inst = result.callee->getName() != f.getName();
|
||||
result.callee_name = result.callee->getName().str();
|
||||
result.is_call_inst = result.callee_name != f.getName();
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -155,10 +181,11 @@ namespace aarch64
|
||||
{
|
||||
// Very unlikely to be the same function. Can be considered a function exit.
|
||||
ensure(bi->getNumDestinations() == 1);
|
||||
auto targetbb = bi->getSuccessor(0);
|
||||
auto targetbb = ensure(bi->getSuccessor(0)); // This is guaranteed to fail but I've yet to encounter this
|
||||
|
||||
result.callee = targetbb->getParent();
|
||||
result.is_call_inst = result.callee->getName() != f.getName();
|
||||
result.callee_name = result.callee->getName().str();
|
||||
result.is_call_inst = result.callee_name != f.getName();
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -168,7 +195,8 @@ namespace aarch64
|
||||
auto targetbb = bi->getSuccessor(0);
|
||||
|
||||
result.callee = targetbb->getParent();
|
||||
result.is_call_inst = result.callee->getName() != f.getName();
|
||||
result.callee_name = result.callee->getName().str();
|
||||
result.is_call_inst = result.callee_name != f.getName();
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -178,13 +206,29 @@ namespace aarch64
|
||||
auto targetbb = bi->getSuccessor(0);
|
||||
|
||||
result.callee = targetbb->getParent();
|
||||
result.is_call_inst = result.callee->getName() != f.getName();
|
||||
result.callee_name = result.callee->getName().str();
|
||||
result.is_call_inst = result.callee_name != f.getName();
|
||||
return result;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
gpr GHC_frame_preservation_pass::get_base_register_for_call(const std::string& callee_name)
|
||||
{
|
||||
// We go over the base_register_lookup table and find the first matching pattern
|
||||
for (const auto& pattern : execution_context.base_register_lookup)
|
||||
{
|
||||
if (callee_name.starts_with(pattern.first))
|
||||
{
|
||||
return pattern.second;
|
||||
}
|
||||
}
|
||||
|
||||
// Default is x19
|
||||
return aarch64::x19;
|
||||
}
|
||||
|
||||
void GHC_frame_preservation_pass::run(llvm::IRBuilder<>* irb, llvm::Function& f)
|
||||
{
|
||||
if (f.getCallingConv() != llvm::CallingConv::GHC)
|
||||
@ -200,6 +244,14 @@ namespace aarch64
|
||||
}
|
||||
|
||||
const auto this_name = f.getName().str();
|
||||
if (visited_functions.find(this_name) != visited_functions.end())
|
||||
{
|
||||
// Already processed. Only useful when recursing which is currently not used.
|
||||
DPRINT("Function %s was already processed. Skipping.\n", this_name.c_str());
|
||||
return;
|
||||
}
|
||||
visited_functions.insert(this_name);
|
||||
|
||||
if (exclusion_callback && exclusion_callback(this_name))
|
||||
{
|
||||
// Function is explicitly excluded
|
||||
@ -220,14 +272,6 @@ namespace aarch64
|
||||
// Asm snippets for patching stack frame
|
||||
std::string frame_prologue, frame_epilogue;
|
||||
|
||||
// Return address reload on exit. This is safer than trying to stuff things into the stack frame since the size is largely just guesswork at this time.
|
||||
std::string x30_tail_restore = fmt::format(
|
||||
"mov x30, #%u;\n" // Load offset to last gateway exit
|
||||
"add x30, x%u, x30;\n" // Add to base register
|
||||
"ldr x30, [x30];\n", // Load x30
|
||||
execution_context.hypervisor_context_offset,
|
||||
static_cast<u32>(execution_context.base_register));
|
||||
|
||||
if (function_info.stack_frame_size > 0)
|
||||
{
|
||||
// NOTE: The stack frame here is purely optional, we can pre-allocate scratch on the gateway.
|
||||
@ -235,8 +279,12 @@ namespace aarch64
|
||||
frame_prologue = fmt::format("sub sp, sp, #%u;", function_info.stack_frame_size);
|
||||
frame_epilogue = fmt::format("add sp, sp, #%u;", function_info.stack_frame_size);
|
||||
|
||||
// Emit the frame prologue
|
||||
LLVM_ASM_0(frame_prologue, irb, f.getContext());
|
||||
// Emit the frame prologue. We use a BB here for extra safety as it solves the problem of backwards jumps re-executing the prologue.
|
||||
auto functionStart = &f.front();
|
||||
auto prologueBB = llvm::BasicBlock::Create(f.getContext(), "", &f, functionStart);
|
||||
irb->SetInsertPoint(prologueBB, prologueBB->begin());
|
||||
LLVM_ASM_VOID(frame_prologue, irb, f.getContext());
|
||||
irb->CreateBr(functionStart);
|
||||
}
|
||||
|
||||
// Now we start processing
|
||||
@ -259,7 +307,6 @@ namespace aarch64
|
||||
if (cf->hasFnAttribute(llvm::Attribute::AlwaysInline) || callee_name.starts_with("llvm."))
|
||||
{
|
||||
// Always inlined call. Likely inline Asm. Skip
|
||||
// log("Function %s will ignore call to intrinsic function %s\n", this_name.c_str(), callee_name.c_str());
|
||||
++bit;
|
||||
continue;
|
||||
}
|
||||
@ -278,48 +325,62 @@ namespace aarch64
|
||||
|
||||
if (function_info.stack_frame_size > 0)
|
||||
{
|
||||
// 1. Nuke all scratch
|
||||
LLVM_ASM_0(frame_epilogue, irb, f.getContext());
|
||||
// 1. Nuke the local stack frame if any
|
||||
LLVM_ASM_VOID(frame_epilogue, irb, f.getContext());
|
||||
}
|
||||
|
||||
if (function_info.clobbers_x30)
|
||||
{
|
||||
// 2. Restore the gateway as the current return address
|
||||
LLVM_ASM_0(x30_tail_restore, irb, f.getContext());
|
||||
}
|
||||
|
||||
// 3. We're about to make a tail call. This means after this call, we're supposed to return immediately. In that case, don't link, lower to branch only.
|
||||
// 2. We're about to make a tail call. This means after this call, we're supposed to return immediately. In that case, don't link, lower to branch only.
|
||||
// Note that branches have some undesirable side-effects. For one, we lose the argument inputs, which the callee is expecting.
|
||||
// This means we burn some cycles on every exit, but in return we do not require one instruction on the prologue + the ret chain is eliminated.
|
||||
// No ret-chain also means two BBs can call each other indefinitely without running out of stack without relying on llvm to optimize that away.
|
||||
|
||||
std::string exit_fn;
|
||||
auto ci = ensure(llvm::dyn_cast<llvm::CallInst>(original_inst));
|
||||
auto operand_count = ci->getNumOperands();
|
||||
auto operand_count = ci->getNumOperands() - 1; // The last operand is the callee, not a real operand
|
||||
std::vector<std::string> constraints;
|
||||
std::vector<llvm::Value*> args;
|
||||
|
||||
// We now load the callee args.
|
||||
// FIXME: This is often times redundant and wastes cycles, we'll clean this up in a MachineFunction pass later.
|
||||
int base_reg = execution_context.base_register;
|
||||
int args_base_reg = instruction_info.callee_is_GHC ? aarch64::x19 : aarch64::x0; // GHC args are always x19..x25
|
||||
for (unsigned i = 0; i < operand_count; ++i)
|
||||
{
|
||||
args.push_back(ci->getOperand(i));
|
||||
exit_fn += fmt::format("mov x%d, $%u;\n", base_reg++, i);
|
||||
exit_fn += fmt::format("mov x%d, $%u;\n", args_base_reg++, i);
|
||||
constraints.push_back("r");
|
||||
}
|
||||
|
||||
std::copy(ci->operands().begin(), ci->operands().end(), args.begin());
|
||||
auto context_base_reg = get_base_register_for_call(instruction_info.callee_name);
|
||||
if (!instruction_info.callee_is_GHC)
|
||||
{
|
||||
// For non-GHC calls, we have to remap the arguments to x0...
|
||||
context_base_reg = static_cast<gpr>(context_base_reg - 19);
|
||||
}
|
||||
|
||||
if (function_info.clobbers_x30)
|
||||
{
|
||||
// 3. Restore the exit gate as the current return address
|
||||
// We want to do this after loading the arguments in case there was any spilling involved.
|
||||
DPRINT("Patching call from %s to %s on register %d...",
|
||||
this_name.c_str(),
|
||||
instruction_info.callee_name.c_str(),
|
||||
static_cast<int>(context_base_reg));
|
||||
|
||||
const auto x30_tail_restore = fmt::format(
|
||||
"ldr x30, [x%u, #%u];\n", // Load x30 from thread context
|
||||
static_cast<u32>(context_base_reg),
|
||||
execution_context.hypervisor_context_offset);
|
||||
|
||||
exit_fn += x30_tail_restore;
|
||||
}
|
||||
|
||||
auto target = ensure(ci->getCalledOperand());
|
||||
args.push_back(target);
|
||||
|
||||
if (ci->isIndirectCall())
|
||||
if (instruction_info.is_indirect)
|
||||
{
|
||||
constraints.push_back("r");
|
||||
exit_fn += fmt::format(
|
||||
"mov x15, $%u;\n"
|
||||
"br x15",
|
||||
operand_count);
|
||||
exit_fn += fmt::format("br $%u;\n", operand_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -328,7 +389,7 @@ namespace aarch64
|
||||
}
|
||||
|
||||
// Emit the branch
|
||||
LLVM_ASM(exit_fn, args, join_strings(constraints, ","), irb, f.getContext());
|
||||
llvm_asm(irb, exit_fn, args, join_strings(constraints, ","), f.getContext());
|
||||
|
||||
// Delete original call instruction
|
||||
bit = ci->eraseFromParent();
|
||||
|
@ -39,14 +39,16 @@ namespace aarch64
|
||||
bool is_returning; // This instruction "returns" to the next instruction (typically just llvm::CallInst*)
|
||||
bool callee_is_GHC; // The other function is GHC
|
||||
bool is_tail_call; // Tail call. Assume it is an exit/terminator.
|
||||
bool is_indirect; // Indirect call. Target is the first operand.
|
||||
llvm::Function* callee; // Callee if any
|
||||
std::string callee_name; // Name of the callee.
|
||||
};
|
||||
protected:
|
||||
std::unordered_set<std::string> visited_functions;
|
||||
|
||||
struct
|
||||
{
|
||||
gpr base_register;
|
||||
std::vector<std::pair<std::string, gpr>> base_register_lookup;
|
||||
u32 hypervisor_context_offset;
|
||||
} execution_context;
|
||||
|
||||
@ -57,11 +59,13 @@ namespace aarch64
|
||||
function_info_t preprocess_function(llvm::Function& f);
|
||||
|
||||
instruction_info_t decode_instruction(llvm::Function& f, llvm::Instruction* i);
|
||||
|
||||
gpr get_base_register_for_call(const std::string& callee_name);
|
||||
public:
|
||||
|
||||
GHC_frame_preservation_pass(
|
||||
gpr base_reg,
|
||||
u32 hv_ctx_offset,
|
||||
const std::vector<std::pair<std::string, gpr>>& base_register_lookup = {},
|
||||
std::function<bool(const std::string&)> exclusion_callback = {});
|
||||
~GHC_frame_preservation_pass() = default;
|
||||
|
||||
|
@ -3089,6 +3089,9 @@ protected:
|
||||
|
||||
void initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine);
|
||||
|
||||
// Run intrinsics replacement pass
|
||||
void replace_intrinsics(llvm::Function&);
|
||||
|
||||
public:
|
||||
// Register a transformation pass to be run before final compilation by llvm
|
||||
void register_transform_pass(std::unique_ptr<translator_pass>& pass)
|
||||
@ -3797,9 +3800,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// Run intrinsics replacement pass
|
||||
void replace_intrinsics(llvm::Function&);
|
||||
|
||||
// Finalize processing
|
||||
void run_transforms(llvm::Function&);
|
||||
|
||||
@ -3935,25 +3935,39 @@ llvm::InlineAsm* compile_inline_asm(
|
||||
}
|
||||
|
||||
// Helper for ASM generation with dynamic number of arguments
|
||||
static inline
|
||||
llvm::CallInst* llvm_asm(
|
||||
llvm::IRBuilder<>* irb,
|
||||
std::string& asm_,
|
||||
llvm::ArrayRef<llvm::Value*> args,
|
||||
const std::string& constraints,
|
||||
llvm::LLVMContext& context)
|
||||
{
|
||||
llvm::ArrayRef<llvm::Type*> types_ref = std::nullopt;
|
||||
std::vector<llvm::Type*> types;
|
||||
types.reserve(args.size());
|
||||
|
||||
if (!args.empty())
|
||||
{
|
||||
for (const auto& arg : args)
|
||||
{
|
||||
types.push_back(arg->getType());
|
||||
}
|
||||
types_ref = types;
|
||||
}
|
||||
|
||||
auto return_type = llvm::Type::getVoidTy(context);
|
||||
auto callee = compile_inline_asm(return_type, types_ref, asm_, constraints);
|
||||
auto c = irb->CreateCall(callee, args);
|
||||
c->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
return c;
|
||||
}
|
||||
|
||||
#define LLVM_ASM(asm_, args, constraints, irb, ctx)\
|
||||
do {\
|
||||
std::vector<llvm::Type*> _argTypes;\
|
||||
_argTypes.reserve(args.size());\
|
||||
for (const auto& _arg : args) _argTypes.push_back(_arg->getType());\
|
||||
auto _returnType = llvm::Type::getVoidTy(ctx); \
|
||||
llvm::FunctionCallee _callee = compile_inline_asm(_returnType, _argTypes, asm_, constraints); \
|
||||
auto _c = irb->CreateCall(_callee, args); \
|
||||
_c->addFnAttr(llvm::Attribute::AlwaysInline); \
|
||||
} while(0)
|
||||
llvm_asm(irb, asm_, args, constraints, ctx)
|
||||
|
||||
// Helper for ASM generation with 0 args
|
||||
#define LLVM_ASM_0(asm_, irb, ctx)\
|
||||
do {\
|
||||
const auto _voidTy = llvm::Type::getVoidTy(ctx); \
|
||||
auto _callee = compile_inline_asm(_voidTy, std::nullopt, asm_, ""); \
|
||||
auto _c = irb->CreateCall(_callee); \
|
||||
_c->setTailCall(); \
|
||||
_c->addFnAttr(llvm::Attribute::AlwaysInline); \
|
||||
} while(0)
|
||||
#define LLVM_ASM_VOID(asm_, irb, ctx)\
|
||||
llvm_asm(irb, asm_, {}, "", ctx)
|
||||
|
||||
#endif
|
||||
|
@ -371,8 +371,16 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
||||
c.ldr(a64::x24, arm::Mem(gpr_addr_reg, 8));
|
||||
c.ldr(a64::x25, arm::Mem(gpr_addr_reg, 16));
|
||||
|
||||
// GHC frame for the guest. This seems dodgy but the only thing stored on stack is actually registers before making calls to C++ code.
|
||||
// Injected stack frames also work, but are not free and are completely unnecessary.
|
||||
// Thread context save. This is needed for PPU because different functions can switch between x19 and x20 for the base register.
|
||||
// We need a different solution to ensure that no matter which version, we get the right vaue on far return.
|
||||
c.mov(a64::x26, ppu_t_base);
|
||||
|
||||
// Save thread pointer to stack. SP is the only register preserved across GHC calls.
|
||||
c.sub(a64::sp, a64::sp, Imm(16));
|
||||
c.str(a64::x20, arm::Mem(a64::sp));
|
||||
|
||||
// GHC scratchpad mem. If managed correctly (i.e no returns ever), GHC functions should never require a stack frame.
|
||||
// We allocate a slab to use for all functions as they tail-call into each other.
|
||||
c.sub(a64::sp, a64::sp, Imm(4096));
|
||||
|
||||
// Execute LLE call
|
||||
@ -381,11 +389,14 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
||||
// Return address after far jump. Reset sp and start unwinding...
|
||||
c.bind(hv_ctx_pc);
|
||||
|
||||
// Execution guard undo (unneded since we're going to hard-reset the SP)
|
||||
//c.add(a64::sp, a64::sp, Imm(4096));
|
||||
// Clear scratchpad allocation
|
||||
c.add(a64::sp, a64::sp, Imm(4096));
|
||||
|
||||
c.ldr(a64::x20, arm::Mem(a64::sp));
|
||||
c.add(a64::sp, a64::sp, Imm(16));
|
||||
|
||||
// We either got here through normal "ret" which keeps our x20 intact, or we jumped here and the escape reset our x20 reg
|
||||
// Either way, x20 contains our thread base and we forcefully reset the stack pointer
|
||||
// Either way, x26 contains our thread base and we forcefully reset the stack pointer
|
||||
c.add(a64::x14, a64::x20, Imm(hv_register_array_offset)); // Per-thread context save
|
||||
|
||||
c.ldr(a64::x15, arm::Mem(a64::x14, 8));
|
||||
|
@ -36,9 +36,21 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||
|
||||
// Initialize transform passes
|
||||
#ifdef ARCH_ARM64
|
||||
std::unique_ptr<translator_pass> ghc_fixup_pass = std::make_unique<aarch64::GHC_frame_preservation_pass>(
|
||||
aarch64::x20, ::offset32(&ppu_thread::hv_ctx));
|
||||
// Base reg table definition
|
||||
// Assume all functions named __0x... are PPU functions and take the m_exec as the first arg
|
||||
std::vector<std::pair<std::string, aarch64::gpr>> base_reg_lookup = {
|
||||
{ "__0x", aarch64::x20 }, // PPU blocks
|
||||
{ "__indirect", aarch64::x20 }, // Indirect jumps
|
||||
{ "ppu_", aarch64::x19 }, // Fixed JIT helpers (e.g ppu_gateway)
|
||||
{ "__", aarch64::x19 } // Probably link table entries
|
||||
};
|
||||
|
||||
// Create transform pass
|
||||
std::unique_ptr<translator_pass> ghc_fixup_pass = std::make_unique<aarch64::GHC_frame_preservation_pass>(
|
||||
::offset32(&ppu_thread::hv_ctx),
|
||||
base_reg_lookup);
|
||||
|
||||
// Register it
|
||||
register_transform_pass(ghc_fixup_pass);
|
||||
#endif
|
||||
|
||||
@ -282,7 +294,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
}
|
||||
}
|
||||
|
||||
replace_intrinsics(*m_function);
|
||||
run_transforms(*m_function);
|
||||
return m_function;
|
||||
}
|
||||
|
||||
@ -334,7 +346,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
|
||||
{
|
||||
// Possible special case for no functions (allowing the do-while optimization)
|
||||
m_ir->CreateRetVoid();
|
||||
replace_intrinsics(*m_function);
|
||||
run_transforms(*m_function);
|
||||
return m_function;
|
||||
}
|
||||
|
||||
@ -392,7 +404,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
|
||||
|
||||
m_ir->CreateRetVoid();
|
||||
|
||||
replace_intrinsics(*m_function);
|
||||
run_transforms(*m_function);
|
||||
return m_function;
|
||||
}
|
||||
|
||||
@ -5357,7 +5369,7 @@ void PPUTranslator::build_interpreter()
|
||||
this->i(op); \
|
||||
FlushRegisters(); \
|
||||
m_ir->CreateRetVoid(); \
|
||||
replace_intrinsics(*m_function); \
|
||||
run_transforms(*m_function); \
|
||||
}
|
||||
|
||||
BUILD_VEC_INST(VADDCUW);
|
||||
|
Loading…
x
Reference in New Issue
Block a user