PPU/LLVM: Simplify trace management

This commit is contained in:
Vincent Lejeune 2015-08-23 17:50:16 +02:00 committed by Nekotekina
parent f2c8db75bf
commit 02a1bffc12
2 changed files with 159 additions and 528 deletions

View File

@ -75,7 +75,7 @@ Compiler::~Compiler() {
delete m_llvm_context;
}
std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits) {
std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::string & name, u32 start_address, u32 instruction_count, bool generate_linkable_exits) {
auto compilation_start = std::chrono::high_resolution_clock::now();
m_module = new llvm::Module("Module", *m_llvm_context);
@ -118,7 +118,6 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
fpm->add(createCFGSimplificationPass());
fpm->doInitialization();
m_state.cfg = &cfg;
m_state.generate_linkable_exits = generate_linkable_exits;
// Create the function
@ -132,34 +131,32 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
// Create the entry block and add code to branch to the first instruction
m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0));
m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg.start_address));
m_ir_builder->CreateBr(GetBasicBlockFromAddress(start_address));
// Used to decode instructions
PPUDisAsm dis_asm(CPUDisAsm_DumpMode);
dis_asm.offset = vm::get_ptr<u8>(cfg.start_address);
dis_asm.offset = vm::get_ptr<u8>(start_address);
m_recompilation_engine.Log() << "Recompiling block :\n\n";
// Convert each instruction in the CFG to LLVM IR
std::vector<PHINode *> exit_instr_list;
for (u32 instr_i : cfg.instruction_addresses) {
for (u32 instructionAddress = start_address; instructionAddress < start_address + instruction_count * 4; instructionAddress += 4) {
m_state.hit_branch_instruction = false;
m_state.current_instruction_address = instr_i;
BasicBlock *instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address);
m_state.current_instruction_address = instructionAddress;
BasicBlock *instr_bb = GetBasicBlockFromAddress(instructionAddress);
m_ir_builder->SetInsertPoint(instr_bb);
if (instr_bb->empty()) {
u32 instr = vm::ps3::read32(m_state.current_instruction_address);
u32 instr = vm::ps3::read32(instructionAddress);
// Dump PPU opcode
dis_asm.dump_pc = m_state.current_instruction_address * 4;
(*PPU_instr::main_list)(&dis_asm, instr);
m_recompilation_engine.Log() << dis_asm.last_opcode;
// Dump PPU opcode
dis_asm.dump_pc = instructionAddress;
(*PPU_instr::main_list)(&dis_asm, instr);
m_recompilation_engine.Log() << dis_asm.last_opcode;
Decode(instr);
if (!m_state.hit_branch_instruction)
m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4));
}
Decode(instr);
if (!m_state.hit_branch_instruction)
m_ir_builder->CreateBr(GetBasicBlockFromAddress(instructionAddress + 4));
}
// Generate exit logic for all empty blocks
@ -179,7 +176,7 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
if (generate_linkable_exits) {
Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty());
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32);
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)start_address << 32);
Value *ret_i32 = IndirectCall(m_state.current_instruction_address, context_i64, false);
Value *cmp_i1 = m_ir_builder->CreateICmpNE(ret_i32, m_ir_builder->getInt32(0));
BasicBlock *then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_0");
@ -188,7 +185,7 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
m_ir_builder->SetInsertPoint(then_bb);
context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty());
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32);
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)start_address << 32);
m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64);
m_ir_builder->CreateBr(merge_bb);
@ -215,7 +212,7 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
m_ir_builder->SetInsertPoint(then_bb);
Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty());
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32);
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)start_address << 32);
m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64);
m_ir_builder->CreateBr(merge_bb);
@ -318,7 +315,7 @@ const Executable *RecompilationEngine::GetExecutable(u32 address, bool isFunctio
return isFunction ? &executeFunc : &executeUntilReturn;
}
bool RecompilationEngine::isAddressCommited(u32 address) const
bool RecompilationEngine::isAddressCommited(u32 address) const
{
size_t offset = address * sizeof(Executable);
size_t page = offset / 4096;
@ -337,7 +334,7 @@ void RecompilationEngine::commitAddress(u32 address)
FunctionCachePagesCommited[page >> 3] |= (1 << (page & 7));
}
const Executable RecompilationEngine::GetCompiledExecutableIfAvailable(u32 address)
const Executable RecompilationEngine::GetCompiledExecutableIfAvailable(u32 address)
{
std::lock_guard<std::mutex> lock(m_address_to_function_lock);
if (!isAddressCommited(address / 4))
@ -353,10 +350,10 @@ const Executable RecompilationEngine::GetCompiledExecutableIfAvailable(u32 addre
return std::get<0>(It->second);
}
void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) {
void RecompilationEngine::NotifyBlockStart(u32 address) {
{
std::lock_guard<std::mutex> lock(m_pending_execution_traces_lock);
m_pending_execution_traces.push_back(execution_trace);
std::lock_guard<std::mutex> lock(m_pending_address_start_lock);
m_pending_address_start.push_back(address);
}
if (!joinable()) {
@ -384,22 +381,16 @@ void RecompilationEngine::Task() {
auto start = std::chrono::high_resolution_clock::now();
while (joinable() && !Emu.IsStopped()) {
bool work_done_this_iteration = false;
ExecutionTrace * execution_trace = nullptr;
std::list <u32> m_current_execution_traces;
{
std::lock_guard<std::mutex> lock(m_pending_execution_traces_lock);
auto i = m_pending_execution_traces.begin();
if (i != m_pending_execution_traces.end()) {
execution_trace = *i;
m_pending_execution_traces.erase(i);
}
std::lock_guard<std::mutex> lock(m_pending_address_start_lock);
m_current_execution_traces.swap(m_pending_address_start);
}
if (execution_trace) {
ProcessExecutionTrace(*execution_trace);
work_done_this_iteration = true;
delete execution_trace;
if (!m_current_execution_traces.empty()) {
for (u32 address : m_current_execution_traces)
work_done_this_iteration |= ProcessExecutionTrace(address);
}
if (!work_done_this_iteration) {
@ -429,122 +420,116 @@ void RecompilationEngine::Task() {
s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this.
}
void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) {
auto execution_trace_id = execution_trace.GetId();
auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id);
if (processed_execution_trace_i == m_processed_execution_traces.end()) {
Log() << "Trace: " << execution_trace.ToString() << "\n";
// Find the function block
BlockEntry key(execution_trace.function_address, execution_trace.function_address);
auto block_i = m_block_table.find(&key);
if (block_i == m_block_table.end()) {
block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address));
}
auto function_block = *block_i;
block_i = m_block_table.end();
auto split_trace = false;
std::vector<BlockEntry *> tmp_block_list;
for (auto trace_i = execution_trace.entries.begin(); trace_i != execution_trace.entries.end(); trace_i++) {
if (trace_i->type == ExecutionTraceEntry::Type::CompiledBlock) {
block_i = m_block_table.end();
split_trace = true;
}
if (block_i == m_block_table.end()) {
BlockEntry key(trace_i->GetPrimaryAddress(), execution_trace.function_address);
block_i = m_block_table.find(&key);
if (block_i == m_block_table.end()) {
block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address));
}
tmp_block_list.push_back(*block_i);
}
const ExecutionTraceEntry * next_trace = nullptr;
if (trace_i + 1 != execution_trace.entries.end()) {
next_trace = &(*(trace_i + 1));
}
else if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) {
next_trace = &(*(execution_trace.entries.begin()));
}
UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace);
if (*block_i != function_block) {
UpdateControlFlowGraph(function_block->cfg, *trace_i, next_trace);
}
}
processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list)));
}
for (auto i = processed_execution_trace_i->second.begin(); i != processed_execution_trace_i->second.end(); i++) {
if (!(*i)->is_compiled) {
(*i)->num_hits++;
if ((*i)->num_hits >= Ini.LLVMThreshold.GetValue()) {
CompileBlock(*(*i));
}
bool RecompilationEngine::ProcessExecutionTrace(u32 address) {
auto It = m_block_table.find(address);
if (It == m_block_table.end())
It = m_block_table.emplace(address, BlockEntry(address)).first;
BlockEntry &block = It->second;
if (!block.is_compiled) {
block.num_hits++;
if (block.num_hits >= Ini.LLVMThreshold.GetValue()) {
CompileBlock(block);
return true;
}
}
// TODO:: Syphurith: It is said that just remove_if would cause some troubles.. I don't know if that would cause Memleak. From CppCheck:
// The return value of std::remove_if() is ignored. This function returns an iterator to the end of the range containing those elements that should be kept.
// Elements past new end remain valid but with unspecified values. Use the erase method of the container to delete them.
std::remove_if(processed_execution_trace_i->second.begin(), processed_execution_trace_i->second.end(), [](const BlockEntry * b)->bool { return b->is_compiled; });
return false;
}
void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry) {
if (this_entry.type == ExecutionTraceEntry::Type::Instruction) {
cfg.instruction_addresses.insert(this_entry.GetPrimaryAddress());
/**
* This code is inspired from Dolphin PPC Analyst
*/
inline s32 SignExt16(s16 x) { return (s32)(s16)x; }
inline s32 SignExt26(u32 x) { return x & 0x2000000 ? (s32)(x | 0xFC000000) : (s32)(x); }
if (next_entry) {
if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) {
if (next_entry->GetPrimaryAddress() != (this_entry.GetPrimaryAddress() + 4)) {
cfg.branches[this_entry.GetPrimaryAddress()].insert(next_entry->GetPrimaryAddress());
bool RecompilationEngine::AnalyseBlock(BlockEntry &functionData, size_t maxSize)
{
u32 startAddress = functionData.address;
u32 farthestBranchTarget = startAddress;
functionData.instructionCount = 0;
functionData.calledFunctions.clear();
functionData.is_analysed = true;
functionData.is_compilable_function = true;
Log() << "Analysing " << (void*)(uint64_t)startAddress << "\n";
for (size_t instructionAddress = startAddress; instructionAddress < startAddress + maxSize; instructionAddress += 4)
{
u32 instr = vm::ps3::read32((u32)instructionAddress);
functionData.instructionCount++;
if (instr == PPU_instr::implicts::BLR() && instructionAddress >= farthestBranchTarget && functionData.is_compilable_function)
{
Log() << "Analysis: Block is compilable into a function \n";
return true;
}
else if (PPU_instr::fields::GD_13(instr) == PPU_opcodes::G_13Opcodes::BCCTR)
{
if (!PPU_instr::fields::LK(instr))
{
Log() << "Analysis: indirect branching found \n";
functionData.is_compilable_function = false;
return true;
}
}
else if (PPU_instr::fields::OPCD(instr) == PPU_opcodes::PPU_MainOpcodes::BC)
{
u32 target = SignExt16(PPU_instr::fields::BD(instr));
if (!PPU_instr::fields::AA(instr)) // Absolute address
target += (u32)instructionAddress;
if (target > farthestBranchTarget && !PPU_instr::fields::LK(instr))
farthestBranchTarget = target;
}
else if (PPU_instr::fields::OPCD(instr) == PPU_opcodes::PPU_MainOpcodes::B)
{
u32 target = SignExt26(PPU_instr::fields::LL(instr));
if (!PPU_instr::fields::AA(instr)) // Absolute address
target += (u32)instructionAddress;
if (!PPU_instr::fields::LK(instr))
{
if (target < startAddress)
{
Log() << "Analysis: branch to previous block\n";
functionData.is_compilable_function = false;
return true;
}
else if (target > farthestBranchTarget)
farthestBranchTarget = target;
}
else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) {
cfg.calls[this_entry.data.instruction.address].insert(next_entry->GetPrimaryAddress());
}
}
}
else if (this_entry.type == ExecutionTraceEntry::Type::CompiledBlock) {
if (next_entry) {
if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) {
cfg.branches[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress());
}
else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) {
cfg.calls[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress());
}
else
functionData.calledFunctions.insert(target);
}
}
Log() << "Analysis: maxSize reached \n";
functionData.is_compilable_function = false;
return true;
}
void RecompilationEngine::CompileBlock(BlockEntry & block_entry) {
Log() << "Compile: " << block_entry.ToString() << "\n";
Log() << "CFG: " << block_entry.cfg.ToString() << "\n";
if (block_entry.is_analysed)
return;
const std::pair<Executable, llvm::ExecutionEngine *> &compileResult =
m_compiler.Compile(fmt::format("fn_0x%08X", block_entry.cfg.start_address), block_entry.cfg,
block_entry.IsFunction() ? true : false /*generate_linkable_exits*/);
if (!AnalyseBlock(block_entry))
return;
Log() << "Compile: " << block_entry.ToString() << "\n";
const std::pair<Executable, llvm::ExecutionEngine *> &compileResult =
m_compiler.Compile(fmt::format("fn_0x%08X", block_entry.address), block_entry.address, block_entry.instructionCount, false /*generate_linkable_exits*/);
// If entry doesn't exist, create it (using lock)
std::unordered_map<u32, ExecutableStorage>::iterator It = m_address_to_function.find(block_entry.cfg.start_address);
std::unordered_map<u32, ExecutableStorage>::iterator It = m_address_to_function.find(block_entry.address);
if (It == m_address_to_function.end())
{
std::lock_guard<std::mutex> lock(m_address_to_function_lock);
std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = nullptr;
if (!isAddressCommited(block_entry.cfg.start_address / 4))
commitAddress(block_entry.cfg.start_address / 4);
std::get<1>(m_address_to_function[block_entry.address]) = nullptr;
if (!isAddressCommited(block_entry.address / 4))
commitAddress(block_entry.address / 4);
}
std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = std::unique_ptr<llvm::ExecutionEngine>(compileResult.second);
std::get<0>(m_address_to_function[block_entry.cfg.start_address]) = compileResult.first;
std::get<3>(m_address_to_function[block_entry.cfg.start_address]) = m_currentId;
Log() << "ID IS " << m_currentId << "\n";
std::get<1>(m_address_to_function[block_entry.address]) = std::unique_ptr<llvm::ExecutionEngine>(compileResult.second);
std::get<0>(m_address_to_function[block_entry.address]) = compileResult.first;
std::get<3>(m_address_to_function[block_entry.address]) = m_currentId;
Log() << "Associating " << (void*)(uint64_t)block_entry.address << " with ID " << m_currentId << "\n";
m_currentId++;
block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize();
block_entry.is_compiled = true;
FunctionCache[block_entry.cfg.start_address / 4] = compileResult.first;
FunctionCache[block_entry.address / 4] = compileResult.first;
}
std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() {
@ -557,86 +542,6 @@ std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() {
return s_the_instance;
}
Tracer::Tracer()
: m_recompilation_engine(RecompilationEngine::GetInstance()) {
m_stack.reserve(100);
}
Tracer::~Tracer() {
Terminate();
}
void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) {
ExecutionTrace * execution_trace = nullptr;
switch (trace_type) {
case TraceType::CallFunction:
// arg1 is address of the function
m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::FunctionCall, arg1));
break;
case TraceType::EnterFunction:
// arg1 is address of the function
m_stack.push_back(new ExecutionTrace(arg1));
break;
case TraceType::ExitFromCompiledFunction:
// arg1 is address of function.
// arg2 is the address of the exit instruction.
if (arg2) {
m_stack.push_back(new ExecutionTrace(arg1));
m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2));
}
break;
case TraceType::Return:
// No args used
execution_trace = m_stack.back();
execution_trace->type = ExecutionTrace::Type::Linear;
m_stack.pop_back();
break;
case TraceType::Instruction:
// arg1 is the address of the instruction
for (int i = (int)m_stack.back()->entries.size() - 1; i >= 0; i--) {
if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].data.instruction.address == arg1) ||
(m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].data.compiled_block.entry_address == arg1)) {
// Found a loop
execution_trace = new ExecutionTrace(m_stack.back()->function_address);
execution_trace->type = ExecutionTrace::Type::Loop;
std::copy(m_stack.back()->entries.begin() + i, m_stack.back()->entries.end(), std::back_inserter(execution_trace->entries));
m_stack.back()->entries.erase(m_stack.back()->entries.begin() + i + 1, m_stack.back()->entries.end());
break;
}
}
if (!execution_trace) {
// A loop was not found
m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::Instruction, arg1));
}
break;
case TraceType::ExitFromCompiledBlock:
// arg1 is address of the compiled block.
// arg2 is the address of the exit instruction.
m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2));
if (arg2 == 0) {
// Return from function
execution_trace = m_stack.back();
execution_trace->type = ExecutionTrace::Type::Linear;
m_stack.pop_back();
}
break;
default:
assert(0);
break;
}
if (execution_trace) {
m_recompilation_engine->NotifyTrace(execution_trace);
}
}
void Tracer::Terminate() {
// TODO: Notify recompilation engine
}
ppu_recompiler_llvm::CPUHybridDecoderRecompiler::CPUHybridDecoderRecompiler(PPUThread & ppu)
: m_ppu(ppu)
, m_interpreter(new PPUInterpreter(ppu))
@ -656,7 +561,6 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::DecodeMemory(const u32 addr
u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteFunction(PPUThread * ppu_state, u64 context) {
auto execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder();
execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0);
return ExecuteTillReturn(ppu_state, 0);
}
@ -687,21 +591,19 @@ static BranchType GetBranchTypeFromInstruction(u32 instruction)
u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread * ppu_state, u64 context) {
CPUHybridDecoderRecompiler *execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder();
if (context)
execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledFunction, context >> 32, context & 0xFFFFFFFF);
execution_engine->m_recompilation_engine->NotifyBlockStart(ppu_state->PC);
while (PollStatus(ppu_state) == false) {
while (PollStatus(ppu_state) == false) {
const Executable executable = execution_engine->m_recompilation_engine->GetCompiledExecutableIfAvailable(ppu_state->PC);
if (executable)
{
auto entry = ppu_state->PC;
u32 exit = (u32)executable(ppu_state, 0);
execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit);
if (exit == 0)
return 0;
continue;
execution_engine->m_recompilation_engine->NotifyBlockStart(ppu_state->PC);
continue;
}
execution_engine->m_tracer.Trace(Tracer::TraceType::Instruction, ppu_state->PC, 0);
u32 instruction = vm::ps3::read32(ppu_state->PC);
u32 oldPC = ppu_state->PC;
execution_engine->m_decoder.Decode(instruction);
@ -710,11 +612,9 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread
switch (branch_type) {
case BranchType::Return:
execution_engine->m_tracer.Trace(Tracer::TraceType::Return, 0, 0);
if (Emu.GetCPUThreadStop() == ppu_state->PC) ppu_state->fast_stop();
return 0;
case BranchType::FunctionCall: {
execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0);
ExecuteFunction(ppu_state, 0);
break;
}

View File

@ -26,233 +26,9 @@
namespace ppu_recompiler_llvm {
class Compiler;
class RecompilationEngine;
class Tracer;
class ExecutionEngine;
struct PPUState;
/// An entry in an execution trace
struct ExecutionTraceEntry {
/// Data associated with the entry. This is discriminated by type.
union {
struct Instruction {
u32 address;
} instruction;
struct FunctionCall {
u32 address;
} function_call;
struct CompiledBlock {
u32 entry_address;
u32 exit_address;
} compiled_block;
} data;
/// The type of the entry
enum class Type {
FunctionCall,
Instruction,
CompiledBlock,
} type;
ExecutionTraceEntry(Type type, u32 arg1, u32 arg2 = 0)
: type(type) {
switch (type) {
case Type::Instruction:
data.instruction.address = arg1;
break;
case Type::FunctionCall:
data.function_call.address = arg1;
break;
case Type::CompiledBlock:
data.compiled_block.entry_address = arg1;
data.compiled_block.exit_address = arg2;
break;
default:
assert(0);
break;
}
}
u32 GetPrimaryAddress() const {
switch (type) {
case Type::Instruction:
return data.instruction.address;
case Type::FunctionCall:
return data.function_call.address;
case Type::CompiledBlock:
return data.compiled_block.entry_address;
default:
assert(0);
return 0;
}
}
std::string ToString() const {
switch (type) {
case Type::Instruction:
return fmt::format("I:0x%08X", data.instruction.address);
case Type::FunctionCall:
return fmt::format("F:0x%08X", data.function_call.address);
case Type::CompiledBlock:
return fmt::format("C:0x%08X-0x%08X", data.compiled_block.entry_address, data.compiled_block.exit_address);
default:
assert(0);
return "";
}
}
u64 hash() const {
u64 hash = ((u64)type << 32);
switch (type) {
case Type::Instruction:
hash |= data.instruction.address;
break;
case Type::FunctionCall:
hash |= data.function_call.address;
break;
case Type::CompiledBlock:
hash = data.compiled_block.exit_address;
hash <<= 32;
hash |= data.compiled_block.entry_address;
break;
default:
assert(0);
break;
}
return hash;
}
};
/// An execution trace.
struct ExecutionTrace {
/// Unique id of an execution trace;
typedef u64 Id;
/// The function to which this trace belongs
u32 function_address;
/// Execution trace type
enum class Type {
Linear,
Loop,
} type;
/// entries in the trace
std::vector<ExecutionTraceEntry> entries;
ExecutionTrace(u32 address)
: function_address(address) {
}
std::string ToString() const {
auto s = fmt::format("0x%08X %s ->", function_address, type == ExecutionTrace::Type::Loop ? "Loop" : "Linear");
for (auto i = 0; i < entries.size(); i++) {
s += " " + entries[i].ToString();
}
return s;
}
Id GetId() const {
Id id = 0;
for (auto i = entries.begin(); i != entries.end(); i++) {
id ^= i->hash();
id <<= 1;
}
return id;
}
};
/// A control flow graph
struct ControlFlowGraph {
/// Address of the first instruction
u32 start_address;
/// Address of the function to which this CFG belongs to
u32 function_address;
/// Set of addresses of the instructions in the CFG
std::set<u32> instruction_addresses;
/// Branches in the CFG.
/// Key is the address of an instruction
/// Data is the set of all instructions to which this instruction branches to.
std::map<u32, std::set<u32>> branches;
/// Function calls in the CFG
/// Key is the address of an instruction
/// Data is the set of all functions which this instruction invokes.
std::map<u32, std::set<u32>> calls;
ControlFlowGraph(u32 start_address, u32 function_address)
: start_address(start_address)
, function_address(function_address) {
}
void operator += (const ControlFlowGraph & other) {
for (auto i = other.instruction_addresses.begin(); i != other.instruction_addresses.end(); i++) {
instruction_addresses.insert(*i);
}
for (auto i = other.branches.begin(); i != other.branches.end(); i++) {
auto j = branches.find(i->first);
if (j == branches.end()) {
j = branches.insert(branches.begin(), std::make_pair(i->first, std::set<u32>()));
}
for (auto k = i->second.begin(); k != i->second.end(); k++) {
j->second.insert(*k);
}
}
for (auto i = other.calls.begin(); i != other.calls.end(); i++) {
auto j = calls.find(i->first);
if (j == calls.end()) {
j = calls.insert(calls.begin(), std::make_pair(i->first, std::set<u32>()));
}
for (auto k = i->second.begin(); k != i->second.end(); k++) {
j->second.insert(*k);
}
}
}
std::string ToString() const {
auto s = fmt::format("0x%08X (0x%08X): Size=%u ->", start_address, function_address, GetSize());
for (auto i = instruction_addresses.begin(); i != instruction_addresses.end(); i++) {
s += fmt::format(" 0x%08X", *i);
}
s += "\nBranches:";
for (auto i = branches.begin(); i != branches.end(); i++) {
s += fmt::format("\n0x%08X ->", i->first);
for (auto j = i->second.begin(); j != i->second.end(); j++) {
s += fmt::format(" 0x%08X", *j);
}
}
s += "\nCalls:";
for (auto i = calls.begin(); i != calls.end(); i++) {
s += fmt::format("\n0x%08X ->", i->first);
for (auto j = i->second.begin(); j != i->second.end(); j++) {
s += fmt::format(" 0x%08X", *j);
}
}
return s;
}
/// Get the size of the CFG. The size is a score of how large the CFG is and increases everytime
/// a node or an edge is added to the CFG.
size_t GetSize() const {
return instruction_addresses.size() + branches.size() + calls.size();
}
};
enum class BranchType {
NonBranch,
LocalBranch,
@ -295,7 +71,7 @@ namespace ppu_recompiler_llvm {
* Compile a code fragment described by a cfg and return an executable and the ExecutionEngine storing it
* Pointer to function can be retrieved with getPointerToFunction
*/
std::pair<Executable, llvm::ExecutionEngine *> Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits);
std::pair<Executable, llvm::ExecutionEngine *> Compile(const std::string & name, u32 start_address, u32 instruction_count, bool generate_linkable_exits);
/// Retrieve compiler stats
Stats GetStats();
@ -723,9 +499,6 @@ namespace ppu_recompiler_llvm {
/// Args of the LLVM function
llvm::Value * args[MaxArgs];
/// The CFG being compiled
const ControlFlowGraph * cfg;
/// Address of the current instruction being compiled
u32 current_instruction_address;
@ -1021,8 +794,8 @@ namespace ppu_recompiler_llvm {
**/
const Executable GetCompiledExecutableIfAvailable(u32 address);
/// Notify the recompilation engine about a newly detected trace. It takes ownership of the trace.
void NotifyTrace(ExecutionTrace * execution_trace);
/// Notify the recompilation engine about a newly detected block start.
void NotifyBlockStart(u32 address);
/// Log
llvm::raw_fd_ostream & Log();
@ -1035,65 +808,58 @@ namespace ppu_recompiler_llvm {
private:
/// An entry in the block table
struct BlockEntry {
/// Start address
u32 address;
/// Number of times this block was hit
u32 num_hits;
/// Size of the CFG when it was last compiled
size_t last_compiled_cfg_size;
/// The CFG for this block
ControlFlowGraph cfg;
/// Indicates whether this function has been analysed or not
bool is_analysed;
/// Indicates whether the block has been compiled or not
bool is_compiled;
BlockEntry(u32 start_address, u32 function_address)
/// Indicate wheter the block is a function that can be completly compiled
/// that is, that has a clear "return" semantic and no indirect branch
bool is_compilable_function;
/// If the analysis was successfull, how long the block is.
u32 instructionCount;
/// If the analysis was successfull, which function does it call.
std::set<u32> calledFunctions;
BlockEntry(u32 start_address)
: num_hits(0)
, last_compiled_cfg_size(0)
, address(start_address)
, is_compiled(false)
, cfg(start_address, function_address) {
, is_analysed(false)
, is_compilable_function(false)
, instructionCount(0) {
}
std::string ToString() const {
return fmt::format("0x%08X (0x%08X): NumHits=%u, LastCompiledCfgSize=%u, IsCompiled=%c",
cfg.start_address, cfg.function_address, num_hits, last_compiled_cfg_size, is_compiled ? 'Y' : 'N');
return fmt::format("0x%08X: NumHits=%u, IsCompiled=%c",
address, num_hits, is_compiled ? 'Y' : 'N');
}
bool operator == (const BlockEntry & other) const {
return cfg.start_address == other.cfg.start_address;
return address == other.address;
}
bool IsFunction() const {
return cfg.function_address == cfg.start_address;
}
struct hash {
size_t operator()(const BlockEntry * e) const {
return e->cfg.start_address;
}
};
struct equal_to {
bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const {
return *lhs == *rhs;
}
};
};
/// Log
llvm::raw_fd_ostream * m_log;
/// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue.
std::mutex m_pending_execution_traces_lock;
/// Lock for accessing m_pending_address_start. TODO: Eliminate this and use a lock-free queue.
std::mutex m_pending_address_start_lock;
/// Queue of execution traces pending processing
std::list<ExecutionTrace *> m_pending_execution_traces;
/// Queue of block start address to process
std::list<u32> m_pending_address_start;
/// Block table
std::unordered_set<BlockEntry *, BlockEntry::hash, BlockEntry::equal_to> m_block_table;
/// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes.
std::unordered_map<ExecutionTrace::Id, std::vector<BlockEntry *>> m_processed_execution_traces;
std::unordered_map<u32, BlockEntry> m_block_table;
/// Lock for accessing m_address_to_function.
std::mutex m_address_to_function_lock;
@ -1131,10 +897,15 @@ namespace ppu_recompiler_llvm {
RecompilationEngine & operator = (RecompilationEngine && other) = delete;
/// Process an execution trace.
void ProcessExecutionTrace(const ExecutionTrace & execution_trace);
/// Returns true if a block was compiled
bool ProcessExecutionTrace(u32);
/// Update a CFG
void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry);
/**
* Analyse block to get useful info (function called, has indirect branch...)
* This code is inspired from Dolphin PPC Analyst
* Return true if analysis is successful.
*/
bool AnalyseBlock(BlockEntry &functionData, size_t maxSize = 10000);
/// Compile a block
void CompileBlock(BlockEntry & block_entry);
@ -1146,43 +917,6 @@ namespace ppu_recompiler_llvm {
static std::shared_ptr<RecompilationEngine> s_the_instance;
};
/// Finds interesting execution sequences
class Tracer {
public:
/// Trace type
enum class TraceType : u32 {
CallFunction,
EnterFunction,
ExitFromCompiledFunction,
Return,
Instruction,
ExitFromCompiledBlock,
};
Tracer();
Tracer(const Tracer & other) = delete;
Tracer(Tracer && other) = delete;
virtual ~Tracer();
Tracer & operator = (const Tracer & other) = delete;
Tracer & operator = (Tracer && other) = delete;
/// Notify the tracer
void Trace(TraceType trace_type, u32 arg1, u32 arg2);
/// Notify the tracer that the execution sequence is being terminated.
void Terminate();
private:
/// Call stack
std::vector<ExecutionTrace *> m_stack;
/// Recompilation engine
std::shared_ptr<RecompilationEngine> m_recompilation_engine;
};
/**
* PPU execution engine
* Relies on PPUInterpreter1 to execute uncompiled code.
@ -1216,9 +950,6 @@ namespace ppu_recompiler_llvm {
/// PPU instruction Decoder
PPUDecoder m_decoder;
/// Execution tracer
Tracer m_tracer;
/// Recompilation engine
std::shared_ptr<RecompilationEngine> m_recompilation_engine;