PPU LLVM: paradigm shift

For now, compile only one block at time
Use tail calls to move between blocks
Fully write PPU context (except CIA)

This fixes many compatibility problems
This commit is contained in:
Nekotekina 2017-06-23 00:52:09 +03:00 committed by Ivan
parent a29d7d3962
commit aea094730b
13 changed files with 1076 additions and 856 deletions

View File

@ -5,6 +5,7 @@
#include <unordered_set>
#include <set>
#include <array>
#include <deque>
#include "types.h"
#include "StrFmt.h"
@ -20,6 +21,7 @@
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#ifdef _MSC_VER
#pragma warning(pop)
#endif
@ -50,11 +52,13 @@ static void* const s_memory = []() -> void*
return utils::memory_reserve(s_memory_size);
}();
static void* s_next;
// Code section
static u8* s_code_addr;
static u64 s_code_size;
#ifdef _WIN32
static std::deque<std::vector<RUNTIME_FUNCTION>> s_unwater;
static std::vector<std::vector<RUNTIME_FUNCTION>> s_unwind; // .pdata
#endif
@ -67,9 +71,9 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
MemoryManager(std::unordered_map<std::string, std::uintptr_t>& table)
: m_link(table)
, m_next(s_memory)
, m_tramps(nullptr)
{
s_next = s_memory;
}
[[noreturn]] static void null()
@ -77,7 +81,7 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
fmt::throw_exception("Null function" HERE);
}
virtual u64 getSymbolAddress(const std::string& name) override
llvm::JITSymbol findSymbol(const std::string& name) override
{
auto& addr = m_link[name];
@ -92,7 +96,6 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
}
else
{
// It's fine if some function is never called, for example.
LOG_ERROR(GENERAL, "LLVM: Linkage failed: %s", name);
addr = (u64)null;
}
@ -104,9 +107,9 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
// Allocate memory for trampolines
if (!m_tramps)
{
m_tramps = reinterpret_cast<decltype(m_tramps)>(m_next);
utils::memory_commit(m_next, 4096, utils::protection::wx);
m_next = (u8*)((u64)m_next + 4096);
m_tramps = reinterpret_cast<decltype(m_tramps)>(s_next);
utils::memory_commit(s_next, 4096, utils::protection::wx);
s_next = (u8*)((u64)s_next + 4096);
}
// Create a trampoline
@ -129,13 +132,13 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
}
}
return addr;
return {addr, llvm::JITSymbolFlags::Exported};
}
virtual u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
{
// Simple allocation
const u64 next = ::align((u64)m_next + size, 4096);
const u64 next = ::align((u64)s_next + size, 4096);
if (next > (u64)s_memory + s_memory_size)
{
@ -143,18 +146,17 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
return nullptr;
}
utils::memory_commit(m_next, size, utils::protection::wx);
s_code_addr = (u8*)m_next;
s_code_size = size;
utils::memory_commit(s_next, size, utils::protection::wx);
s_code_addr = (u8*)s_next;
LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), m_next, size, align);
return (u8*)std::exchange(m_next, (void*)next);
LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), s_next, size, align);
return (u8*)std::exchange(s_next, (void*)next);
}
virtual u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
{
// Simple allocation
const u64 next = ::align((u64)m_next + size, 4096);
const u64 next = ::align((u64)s_next + size, 4096);
if (next > (u64)s_memory + s_memory_size)
{
@ -167,10 +169,10 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
LOG_ERROR(GENERAL, "LLVM: Writeable data section not supported!");
}
utils::memory_commit(m_next, size);
utils::memory_commit(s_next, size);
LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), m_next, size, align, is_ro ? "ro" : "rw");
return (u8*)std::exchange(m_next, (void*)next);
LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), s_next, size, align, is_ro ? "ro" : "rw");
return (u8*)std::exchange(s_next, (void*)next);
}
virtual bool finalizeMemory(std::string* = nullptr) override
@ -191,17 +193,15 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
{
#ifdef _WIN32
// Use s_memory as a BASE, compute the difference
const u64 code_diff = (u64)s_code_addr - (u64)s_memory;
const u64 unwind_diff = (u64)addr - (u64)s_memory;
// Fix RUNTIME_FUNCTION records (.pdata section)
auto& pdata = s_unwind.back();
auto pdata = std::move(s_unwater.front());
s_unwater.pop_front();
for (auto& rf : pdata)
{
rf.BeginAddress += static_cast<DWORD>(code_diff);
rf.EndAddress += static_cast<DWORD>(code_diff);
rf.UnwindData += static_cast<DWORD>(unwind_diff);
rf.UnwindData += static_cast<DWORD>(unwind_diff);
}
// Register .xdata UNWIND_INFO structs
@ -209,6 +209,10 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
{
LOG_ERROR(GENERAL, "RtlAddFunctionTable() failed! Error %u", GetLastError());
}
else
{
s_unwind.emplace_back(std::move(pdata));
}
#endif
return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size);
@ -239,24 +243,13 @@ struct MemoryManager final : llvm::RTDyldMemoryManager
utils::memory_decommit(s_memory, s_memory_size);
}
private:
void* m_next;
};
// Helper class
struct EventListener final : llvm::JITEventListener
{
std::string path;
virtual void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
{
if (!path.empty())
{
const llvm::StringRef elf = obj.getData();
fs::file(path, fs::rewrite).write(elf.data(), elf.size());
}
#ifdef _WIN32
for (auto it = obj.section_begin(), end = obj.section_end(); it != end; ++it)
{
@ -282,7 +275,17 @@ struct EventListener final : llvm::JITEventListener
}
}
s_unwind.emplace_back(std::move(rfs));
// Use s_memory as a BASE, compute the difference
const u64 code_diff = (u64)s_code_addr - (u64)s_memory;
// Fix RUNTIME_FUNCTION records (.pdata section)
for (auto& rf : rfs)
{
rf.BeginAddress += static_cast<DWORD>(code_diff);
rf.EndAddress += static_cast<DWORD>(code_diff);
}
s_unwater.emplace_back(std::move(rfs));
}
}
#endif
@ -291,6 +294,46 @@ struct EventListener final : llvm::JITEventListener
static EventListener s_listener;
// Helper class
class ObjectCache final : public llvm::ObjectCache
{
const std::string& m_path;
public:
ObjectCache(const std::string& path)
: m_path(path)
{
}
~ObjectCache() override = default;
void notifyObjectCompiled(const llvm::Module* module, llvm::MemoryBufferRef obj) override
{
std::string name = m_path;
name.append(module->getName());
fs::file(name, fs::rewrite).write(obj.getBufferStart(), obj.getBufferSize());
LOG_SUCCESS(GENERAL, "LLVM: Created module: %s", module->getName().data());
}
std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* module) override
{
std::string name = m_path;
name.append(module->getName());
if (fs::file cached{name, fs::read})
{
auto buf = llvm::MemoryBuffer::getNewUninitMemBuffer(cached.size());
cached.read(const_cast<char*>(buf->getBufferStart()), buf->getBufferSize());
LOG_SUCCESS(GENERAL, "LLVM: Loaded module: %s", module->getName().data());
return buf;
}
else
{
return nullptr;
}
}
};
jit_compiler::jit_compiler(std::unordered_map<std::string, std::uintptr_t> init_linkage_info, std::string _cpu)
: m_link(std::move(init_linkage_info))
, m_cpu(std::move(_cpu))
@ -321,58 +364,54 @@ jit_compiler::jit_compiler(std::unordered_map<std::string, std::uintptr_t> init_
}
m_engine->RegisterJITEventListener(&s_listener);
LOG_SUCCESS(GENERAL, "LLVM: JIT initialized (%s)", m_cpu);
}
void jit_compiler::load(std::unique_ptr<llvm::Module> module, std::unique_ptr<llvm::object::ObjectFile> object)
void jit_compiler::add(std::unique_ptr<llvm::Module> module, const std::string& path)
{
s_listener.path.clear();
auto* module_ptr = module.get();
ObjectCache cache{path};
m_engine->setObjectCache(&cache);
const auto ptr = module.get();
m_engine->addModule(std::move(module));
m_engine->addObjectFile(std::move(object));
m_engine->finalizeObject();
m_engine->generateCodeForModule(ptr);
m_engine->setObjectCache(nullptr);
m_map.clear();
for (auto& func : module_ptr->functions())
for (auto& func : ptr->functions())
{
const std::string& name = func.getName();
if (!m_link.count(name))
{
// Register compiled function
m_map[name] = m_engine->getFunctionAddress(name);
}
}
}
void jit_compiler::make(std::unique_ptr<llvm::Module> module, std::string path)
{
s_listener.path = std::move(path);
auto* module_ptr = module.get();
m_engine->addModule(std::move(module));
m_engine->finalizeObject();
m_map.clear();
for (auto& func : module_ptr->functions())
{
if (!func.empty())
{
const std::string& name = func.getName();
// Register compiled function
m_map[name] = m_engine->getFunctionAddress(name);
}
// Delete IR to lower memory consumption
func.deleteBody();
}
}
void jit_compiler::fin(const std::string& path)
{
m_engine->finalizeObject();
}
void jit_compiler::add(std::unordered_map<std::string, std::string> data)
{
std::size_t size = 0;
for (auto&& pair : data)
{
size += ::align(pair.second.size(), 16);
}
utils::memory_commit(s_next, size, utils::protection::wx);
std::memset(s_next, 0xc3, ::align(size, 4096));
for (auto&& pair : data)
{
std::memcpy(s_next, pair.second.data(), pair.second.size());
m_link.emplace(pair.first, (u64)s_next);
s_next = (void*)::align((u64)s_next + pair.second.size(), 16);
}
s_next = (void*)::align((u64)s_next, 4096);
}
jit_compiler::~jit_compiler()
{
}

View File

@ -28,27 +28,30 @@ class jit_compiler final
// Execution instance
std::unique_ptr<llvm::ExecutionEngine> m_engine;
// Compiled functions
std::unordered_map<std::string, std::uintptr_t> m_map;
// Linkage cache
std::unordered_map<std::string, std::uintptr_t> m_link;
std::unordered_map<std::string, u64> m_link;
// Compiled functions
std::unordered_map<std::string, u64> m_map;
// Arch
std::string m_cpu;
public:
jit_compiler(std::unordered_map<std::string, std::uintptr_t>, std::string _cpu);
jit_compiler(std::unordered_map<std::string, u64>, std::string _cpu);
~jit_compiler();
// Compile module
void make(std::unique_ptr<llvm::Module>, std::string);
// Add module
void add(std::unique_ptr<llvm::Module> module, const std::string& path);
// Load object
void load(std::unique_ptr<llvm::Module>, std::unique_ptr<llvm::object::ObjectFile>);
// Finalize
void fin(const std::string& path);
// Add functions directly (name -> code)
void add(std::unordered_map<std::string, std::string>);
// Get compiled function address
std::uintptr_t get(const std::string& name) const
u64 get(const std::string& name) const
{
const auto found = m_map.find(name);
@ -57,7 +60,7 @@ public:
return found->second;
}
return 0;
return m_engine->getFunctionAddress(name);
}
// Get CPU info

View File

@ -324,6 +324,205 @@ namespace ppu_patterns
abort1,
abort2,
};
const ppu_pattern get_context[]
{
ADDI(r3, r3, 0xf),
CLRRDI(r3, r3, 4),
STD(r1, r3, 0),
STD(r2, r3, 8),
STD(r14, r3, 0x18),
STD(r15, r3, 0x20),
STD(r16, r3, 0x28),
STD(r17, r3, 0x30),
STD(r18, r3, 0x38),
STD(r19, r3, 0x40),
STD(r20, r3, 0x48),
STD(r21, r3, 0x50),
STD(r22, r3, 0x58),
STD(r23, r3, 0x60),
STD(r24, r3, 0x68),
STD(r25, r3, 0x70),
STD(r26, r3, 0x78),
STD(r27, r3, 0x80),
STD(r28, r3, 0x88),
STD(r29, r3, 0x90),
STD(r30, r3, 0x98),
STD(r31, r3, 0xa0),
MFLR(r0),
STD(r0, r3, 0xa8),
0x7c000026, // mfcr r0
STD(r0, r3, 0xb0),
STFD(f14, r3, 0xb8),
STFD(f15, r3, 0xc0),
STFD(F16, r3, 0xc8),
STFD(f17, r3, 0xd0),
STFD(f18, r3, 0xd8),
STFD(f19, r3, 0xe0),
STFD(f20, r3, 0xe8),
STFD(f21, r3, 0xf0),
STFD(f22, r3, 0xf8),
STFD(f23, r3, 0x100),
STFD(f24, r3, 0x108),
STFD(f25, r3, 0x110),
STFD(f26, r3, 0x118),
STFD(f27, r3, 0x120),
STFD(f28, r3, 0x128),
STFD(f29, r3, 0x130),
STFD(f30, r3, 0x138),
STFD(f31, r3, 0x140),
0x7c0042A6, // mfspr r0, vrsave
STD(r0, r3, 0x148),
ADDI(r4, r3, 0x150),
ADDI(r5, r3, 0x160),
ADDI(r6, r3, 0x170),
ADDI(r7, r3, 0x180),
STVX(v20, r0, r4),
STVX(v21, r0, r5),
STVX(v22, r0, r6),
STVX(v23, r0, r7),
ADDI(r4, r4, 0x40),
ADDI(r5, r5, 0x40),
ADDI(r6, r6, 0x40),
ADDI(r7, r7, 0x40),
STVX(v24, r0, r4),
STVX(v25, r0, r5),
STVX(v26, r0, r6),
STVX(v27, r0, r7),
ADDI(r4, r4, 0x40),
ADDI(r5, r5, 0x40),
ADDI(r6, r6, 0x40),
ADDI(r7, r7, 0x40),
STVX(v28, r0, r4),
STVX(v29, r0, r5),
STVX(v30, r0, r6),
STVX(v31, r0, r7),
LI(r3, 0),
BLR(),
};
const ppu_pattern set_context[]
{
ADDI(r3, r3, 0xf),
CLRRDI(r3, r3, 4),
LD(r1, r3, 0),
LD(r2, r3, 8),
LD(r14, r3, 0x18),
LD(r15, r3, 0x20),
LD(r16, r3, 0x28),
LD(r17, r3, 0x30),
LD(r18, r3, 0x38),
LD(r19, r3, 0x40),
LD(r20, r3, 0x48),
LD(r21, r3, 0x50),
LD(r22, r3, 0x58),
LD(r23, r3, 0x60),
LD(r24, r3, 0x68),
LD(r25, r3, 0x70),
LD(r26, r3, 0x78),
LD(r27, r3, 0x80),
LD(r28, r3, 0x88),
LD(r29, r3, 0x90),
LD(r30, r3, 0x98),
LD(r31, r3, 0xa0),
LD(r0, r3, 0xa8),
MTLR(r0),
LD(r0, r3, 0xb0),
0x7c101120, // mtocrf 1, r0
0x7c102120, // mtocrf 2, r0
0x7c104120, // mtocrf 4, r0
0x7c108120, // mtocrf 8, r0
0x7c110120, // mtocrf 0x10, r0
0x7c120120, // mtocrf 0x20, r0
0x7c140120, // mtocrf 0x40, r0
0x7c180120, // mtocrf 0x80, r0
LFD(f14, r3, 0xb8),
LFD(f15, r3, 0xc0),
LFD(F16, r3, 0xc8),
LFD(f17, r3, 0xd0),
LFD(f18, r3, 0xd8),
LFD(f19, r3, 0xe0),
LFD(f20, r3, 0xe8),
LFD(f21, r3, 0xf0),
LFD(f22, r3, 0xf8),
LFD(f23, r3, 0x100),
LFD(f24, r3, 0x108),
LFD(f25, r3, 0x110),
LFD(f26, r3, 0x118),
LFD(f27, r3, 0x120),
LFD(f28, r3, 0x128),
LFD(f29, r3, 0x130),
LFD(f30, r3, 0x138),
LFD(f31, r3, 0x140),
LD(r0, r3, 0x148),
0x7c0043A6, //mtspr vrsave, r0
ADDI(r5, r3, 0x150),
ADDI(r6, r3, 0x160),
ADDI(r7, r3, 0x170),
ADDI(r8, r3, 0x180),
LVX(v20, r0, r5),
LVX(v21, r0, r6),
LVX(v22, r0, r7),
LVX(v23, r0, r8),
ADDI(r5, r5, 0x40),
ADDI(r6, r6, 0x40),
ADDI(r7, r7, 0x40),
ADDI(r8, r8, 0x40),
LVX(v24, r0, r5),
LVX(v25, r0, r6),
LVX(v26, r0, r7),
LVX(v27, r0, r8),
ADDI(r5, r5, 0x40),
ADDI(r6, r6, 0x40),
ADDI(r7, r7, 0x40),
ADDI(r8, r8, 0x40),
LVX(v28, r0, r5),
LVX(v29, r0, r6),
LVX(v30, r0, r7),
LVX(v31, r0, r8),
LI(r3, 0),
0x7c041810, // subfc r0, r4, r3
0x7c640194, // addze r3, r4
BLR(),
};
const ppu_pattern x26c[]
{
LI(r9, 0),
STD(r9, r6, 0),
MR(r1, r6),
STDU(r1, r1, -0x70),
STD(r9, r1, 0),
CLRLDI(r7, r3, 32),
LWZ(r0, r7, 0),
MTCTR(r0),
LWZ(r2, r7, 4),
MR(r3, r4),
MR(r4, r5),
BCTRL(),
};
const ppu_pattern x2a0[]
{
MR(r8, r1),
0x7d212850, // subf r9, r1, r5
0x7c21496a, // stdux r1, r1, r9
MFLR(r0),
STD(r0, r8, 0x10),
STD(r2, r1, 0x28),
CLRLDI(r7, r3, 32),
LWZ(r0, r7, 0),
MTCTR(r0),
LWZ(r2, r7, 4),
MR(r3, r4),
BCTRL(),
LD(r2, r1, 0x28),
LD(r9, r1, 0x0),
LD(r0, r9, 0x10),
MTLR(r0),
MR(r1, r9),
BLR(),
};
}
std::vector<ppu_function> ppu_analyse(const std::vector<std::pair<u32, u32>>& segs, const std::vector<std::pair<u32, u32>>& secs, u32 lib_toc, u32 entry)
@ -374,6 +573,7 @@ std::vector<ppu_function> ppu_analyse(const std::vector<std::pair<u32, u32>>& se
func_queue.emplace_back(func);
func.addr = addr;
func.toc = toc;
func.name = fmt::format("__0x%x", func.addr);
LOG_TRACE(PPU, "Function 0x%x added (toc=0x%x)", addr, toc);
return func;
};
@ -1009,7 +1209,7 @@ std::vector<ppu_function> ppu_analyse(const std::vector<std::pair<u32, u32>>& se
}
func.attr += ppu_attr::no_size;
add_block(iaddr);
add_block(jt_addr);
block_queue.clear();
}
else
@ -1031,6 +1231,20 @@ std::vector<ppu_function> ppu_analyse(const std::vector<std::pair<u32, u32>>& se
block.second = _ptr.addr() - block.first;
break;
}
else if (type == ppu_itype::SC)
{
add_block(_ptr.addr());
block.second = _ptr.addr() - block.first;
break;
}
else if (type == ppu_itype::STDU && test(func.attr, ppu_attr::no_size) && (op.opcode == *_ptr || *_ptr == ppu_instructions::BLR()))
{
// Hack
LOG_SUCCESS(PPU, "[0x%x] Instruction repetition: 0x%08x", iaddr, op.opcode);
add_block(_ptr.addr());
block.second = _ptr.addr() - block.first;
break;
}
}
}
@ -1228,7 +1442,7 @@ std::vector<ppu_function> ppu_analyse(const std::vector<std::pair<u32, u32>>& se
for (auto&& pair : funcs)
{
auto& func = pair.second;
LOG_TRACE(PPU, "Function __0x%x (size=0x%x, toc=0x%x, attr %#x)", func.addr, func.size, func.toc, func.attr);
LOG_TRACE(PPU, "Function %s (size=0x%x, toc=0x%x, attr %#x)", func.name, func.size, func.toc, func.attr);
result.emplace_back(std::move(func));
}

View File

@ -17,6 +17,9 @@ enum class ppu_attr : u32
uses_r0,
entry_point,
complex_stack,
special,
//call_use_context,
//call_trace,
__bitset_enum_max
};
@ -35,6 +38,7 @@ struct ppu_function
std::map<u32, u32> blocks; // Basic blocks: addr -> size
std::set<u32> calls; // Set of called functions
std::set<u32> callers;
std::string name; // Function name
};
// PPU Module Information

View File

@ -5,13 +5,14 @@
using ppu_function_t = bool(*)(ppu_thread&);
// BIND_FUNC macro "converts" any appropriate HLE function to ppu_function_t, binding it to PPU thread context.
#define BIND_FUNC(func) (static_cast<ppu_function_t>([](ppu_thread& ppu) -> bool {\
#define BIND_FUNC(func, ...) (static_cast<ppu_function_t>([](ppu_thread& ppu) -> bool {\
const auto old_f = ppu.last_function;\
ppu.last_function = #func;\
ppu_func_detail::do_call(ppu, func);\
ppu.test_state();\
ppu.last_function = old_f;\
return true;\
ppu.cia += 4;\
__VA_ARGS__;\
return false;\
}))
struct ppu_va_args_t

View File

@ -261,7 +261,6 @@ static add_flags_result_t<u64> add64_flags(u64 a, u64 b, bool c)
extern u64 get_timebased_time();
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code);
extern void ppu_execute_function(ppu_thread& ppu, u32 index);
extern u32 ppu_lwarx(ppu_thread& ppu, u32 addr);
extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr);
@ -2875,7 +2874,7 @@ bool ppu_interpreter::SC(ppu_thread& ppu, ppu_opcode_t op)
}
ppu_execute_syscall(ppu, ppu.gpr[11]);
return true;
return false;
}
bool ppu_interpreter::B(ppu_thread& ppu, ppu_opcode_t op)

View File

@ -621,12 +621,6 @@ static void ppu_load_imports(const std::shared_ptr<ppu_linkage_info>& link, u32
std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, const std::string& name)
{
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm && name == "libfiber.sprx")
{
LOG_FATAL(PPU, "libfiber.sprx is not compatible with PPU LLVM Recompiler. Use PPU Interpreter.");
Emu.Pause();
}
std::vector<std::pair<u32, u32>> segments;
std::vector<std::pair<u32, u32>> sections;

View File

@ -236,7 +236,7 @@ inline RT ppu_execute_function_or_callback(const char* name, ppu_thread& ppu, Ar
#define CALL_FUNC(ppu, func, ...) ppu_execute_function_or_callback<decltype(&func), &func>(#func, ppu, __VA_ARGS__)
#define REG_FNID(module, nid, func) ppu_module_manager::register_static_function<decltype(&func), &func>(#module, ppu_select_name(#func, nid), BIND_FUNC(func), ppu_generate_id(nid))
#define REG_FNID(module, nid, func) ppu_module_manager::register_static_function<decltype(&func), &func>(#module, ppu_select_name(#func, nid), BIND_FUNC(func, ppu.cia = (u32)ppu.lr & ~3), ppu_generate_id(nid))
#define REG_FUNC(module, func) REG_FNID(module, #func, func)

View File

@ -102,6 +102,7 @@ const ppu_decoder<ppu_interpreter_fast> s_ppu_interpreter_fast;
extern void ppu_initialize();
extern void ppu_initialize(const ppu_module& info);
static void ppu_initialize2(const ppu_module& info);
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code);
// Get pointer to executable cache
@ -371,7 +372,7 @@ std::string ppu_thread::dump() const
fmt::append(ret, "XER = [CA=%u | OV=%u | SO=%u | CNT=%u]\n", xer.ca, xer.ov, xer.so, xer.cnt);
fmt::append(ret, "VSCR = [SAT=%u | NJ=%u]\n", sat, nj);
fmt::append(ret, "FPSCR = [FL=%u | FG=%u | FE=%u | FU=%u]\n", fpscr.fl, fpscr.fg, fpscr.fe, fpscr.fu);
fmt::append(ret, "\nCall stack:\n=========\n0x%08x (0x0) called\n", g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? 0 : cia);
fmt::append(ret, "\nCall stack:\n=========\n0x%08x (0x0) called\n", cia);
// Determine stack range
u32 stack_ptr = static_cast<u32>(gpr[1]);
@ -474,7 +475,11 @@ void ppu_thread::exec_task()
{
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
{
reinterpret_cast<ppu_function_t>(static_cast<std::uintptr_t>(ppu_ref(cia)))(*this);
while (!test(state, cpu_flag::ret + cpu_flag::exit + cpu_flag::stop))
{
reinterpret_cast<ppu_function_t>(static_cast<std::uintptr_t>(ppu_ref(cia)))(*this);
}
return;
}
@ -769,19 +774,21 @@ extern __m128i sse_cellbe_lvrx(u64 addr);
extern void sse_cellbe_stvlx(u64 addr, __m128i a);
extern void sse_cellbe_stvrx(u64 addr, __m128i a);
[[noreturn]] static void ppu_trap(u64 addr)
[[noreturn]] static void ppu_trap(ppu_thread& ppu, u64 addr)
{
ppu.cia = ::narrow<u32>(addr);
fmt::throw_exception("Trap! (0x%llx)", addr);
}
[[noreturn]] static void ppu_unreachable(u64 addr)
[[noreturn]] static void ppu_error(ppu_thread& ppu, u64 addr, u32 op)
{
fmt::throw_exception("Unreachable! (0x%llx)", addr);
ppu.cia = ::narrow<u32>(addr);
fmt::throw_exception("Unknown/Illegal opcode 0x08x (0x%llx)", op, addr);
}
static void ppu_check(ppu_thread& ppu, u64 addr)
{
ppu.cia = addr;
ppu.cia = ::narrow<u32>(addr);
ppu.test_state();
}
@ -867,6 +874,141 @@ static bool adde_carry(u64 a, u64 b, bool c)
#endif
}
static std::string ppu_context_prologue()
{
std::string c;
//c += "\xCC";
#ifndef _WIN32
c += "\x48\x89\xF9"; // mov rcx, rdi
#endif
c += "\x48\xB8"; // mov rax, imm64
uptr ptr = (uptr)&vm::g_base_addr;
c.append((const char*)&ptr, 8);
c += "\x48\x8B"; // mov rax, [rax]
c += '\0';
c += "\x48\x03\x41"; // add rax, [ppu+r3]
c += char(offset32(&ppu_thread::gpr, 3));
c += "\x48\x83\xC0\x0F"; // add rax, 15
c += "\x48\x83\xE0\xF0"; // and rax, -16
return c;
}
const auto ppu_get_context = []() -> std::string
{
std::string c = ppu_context_prologue();
c += "\x48\x8B\x51"; // mov rdx, [rcx+r1]
c += char(offset32(&ppu_thread::gpr, 1));
c += "\x48\x89\x10"; // mov [rax], rdx
c += "\x48\x8B\x51"; // mov rdx, [rcx+r2]
c += char(offset32(&ppu_thread::gpr, 2));
c += "\x48\x89\x50\x08"; // mov [rax+8], rdx
c += "\x48\x8B\x54\x24\xF8"; // mov rdx, [rsp-8]
c += "\x48\x89\x50\x10"; // mov [rax+0x10], rdx
c += "\x48\x89\x60\x18"; // mov [rax+0x18], rsp
c += "\x48\x89\x58\x20"; // mov [rax+0x20], rbx
c += "\x48\x89\x68\x28"; // mov [rax+0x28], rbp
#ifdef _WIN32
c += "\x48\x89\x70\x30"; // mov [rax+0x30], rsi
c += "\x48\x89\x78\x38"; // mov [rax+0x38], rdi
#endif
c += "\x4C\x89\x60\x40"; // mov [rax+0x40], r12
c += "\x4C\x89\x68\x48"; // mov [rax+0x48], r13
c += "\x4C\x89\x70\x50"; // mov [rax+0x50], r14
c += "\x4C\x89\x78\x58"; // mov [rax+0x58], r15
#ifdef _WIN32
c += "\x66\x0F\x7F\x70\x60"; // movdqa [rax+0x60], xmm6
c += "\x66\x0F\x7F\x78\x70"; // movdqa [rax+0x70], xmm7
c += "\x66\x44\x0F\x7F\x80\x80\x00\x00\x00"s; // ...
c += "\x66\x44\x0F\x7F\x88\x90\x00\x00\x00"s;
c += "\x66\x44\x0F\x7F\x90\xA0\x00\x00\x00"s;
c += "\x66\x44\x0F\x7F\x98\xB0\x00\x00\x00"s;
c += "\x66\x44\x0F\x7F\xA0\xC0\x00\x00\x00"s;
c += "\x66\x44\x0F\x7F\xA8\xD0\x00\x00\x00"s;
c += "\x66\x44\x0F\x7F\xB0\xE0\x00\x00\x00"s;
c += "\x66\x44\x0F\x7F\xB8\xF0\x00\x00\x00"s;
#endif
c += "\x48\xC7\x41"; // mov [rcx+r3], 0
c += char(offset32(&ppu_thread::gpr, 3));
c.append(4, '\0');
//c += "\xCC";
c += "\xC3"; // ret
return c;
}();
const auto ppu_set_context = []() -> std::string
{
std::string c = ppu_context_prologue();
c += "\xCC";
c += "\x48\x8B\x10"; // mov rdx, [rax]
c += "\x48\x89\x51"; // mov [rcx+r1], rdx
c += char(offset32(&ppu_thread::gpr, 1));
c += "\x48\x8B\x50\x08"; // mov rdx, [rax+8]
c += "\x48\x89\x51"; // mov [rcx+r2], rdx
c += char(offset32(&ppu_thread::gpr, 2));
c += "\x48\x8B\x60\x18"; // mov rsp, [rax+0x18]
c += "\x48\x8B\x58\x20"; // mov rbx, [rax+0x20]
c += "\x48\x8B\x68\x28"; // mov rbp, [rax+0x28]
#ifdef _WIN32
c += "\x48\x8B\x70\x30"; // mov rsi, [rax+0x30]
c += "\x48\x8B\x78\x38"; // mov rdi, [rax+0x38]
#endif
c += "\x4C\x8B\x60\x40"; // mov r12, [rax+0x40]
c += "\x4C\x8B\x68\x48"; // mov r13, [rax+0x48]
c += "\x4C\x8B\x70\x50"; // mov r14, [rax+0x50]
c += "\x4C\x8B\x78\x58"; // mov r15, [rax+0x58]
#ifdef _WIN32
c += "\x66\x0F\x6F\x70\x60"; // movdqa xmm6, [rax+0x60]
c += "\x66\x0F\x6F\x78\x70"; // movdqa xmm7, [rax+0x70]
c += "\x66\x44\x0F\x6F\x80\x80\x00\x00\x00"s; // ...
c += "\x66\x44\x0F\x6F\x88\x90\x00\x00\x00"s;
c += "\x66\x44\x0F\x6F\x90\xA0\x00\x00\x00"s;
c += "\x66\x44\x0F\x6F\x98\xB0\x00\x00\x00"s;
c += "\x66\x44\x0F\x6F\xA0\xC0\x00\x00\x00"s;
c += "\x66\x44\x0F\x6F\xA8\xD0\x00\x00\x00"s;
c += "\x66\x44\x0F\x6F\xB0\xE0\x00\x00\x00"s;
c += "\x66\x44\x0F\x6F\xB8\xF0\x00\x00\x00"s;
#endif
c += "\x48\x8B\x50\x10"; // mov rdx, [rax+0x10]
c += "\x48\x89\x54\x24\xF8"; // mov [rsp-8], rdx
c += "\x48\x8B\x51"; // mov rdx, [rcx+r4]
c += char(offset32(&ppu_thread::gpr, 4));
c += "\x48\x85\xD2"; // test rdx, rdx
c += "\x0F\x94\xC2"; // setz dl
c += "\x48\x0F\xB6\xD2"; // movzx rdx, dl
c += "\x48\x89\x51"; // mov [rcx+r3], rdx
c += char(offset32(&ppu_thread::gpr, 3));
c += "\xC3"; // ret
return c;
}();
const auto ppu_use_context = []() -> std::string
{
std::string c;
c += "\x48\xB8"; // mov rax, imm64
uptr ptr = (uptr)&vm::g_exec_addr;
c.append((const char*)&ptr, 8);
c += "\x48\x8B\x20"; // mov rsp, [rax]
#ifdef _WIN32
c += "\x48\x01\xD4"; // add rsp,rdx
#else
c += "\x48\x01\xFC"; // add rsp,rsi
#endif
//c += "\x48\x83\xE4\xE0"; // and rsp, -0x20
#ifdef _WIN32
c += "\x41\xFF\xD0"; // call r8
#else
c += "\xFF\xD2"; // call rdx
#endif
return c;
}();
extern void ppu_initialize()
{
const auto _funcs = fxm::withdraw<std::vector<ppu_function>>();
@ -876,25 +1018,8 @@ extern void ppu_initialize()
return;
}
std::size_t fpos = 0;
while (fpos < _funcs->size())
{
// Split module (TODO)
ppu_module info;
info.name = fmt::format("%05X", _funcs->at(fpos).addr);
info.funcs.reserve(2000);
while (fpos < _funcs->size() && info.funcs.size() < 2000)
{
info.funcs.emplace_back(std::move(_funcs->at(fpos++)));
}
if (!Emu.IsStopped())
{
ppu_initialize(info);
}
}
// Initialize main module
ppu_initialize({"", std::move(*_funcs)});
std::vector<lv2_prx*> prx_list;
@ -903,12 +1028,10 @@ extern void ppu_initialize()
prx_list.emplace_back(&prx);
});
// Initialize preloaded libraries
for (auto ptr : prx_list)
{
if (!Emu.IsStopped())
{
ppu_initialize(*ptr);
}
ppu_initialize(*ptr);
}
}
@ -936,6 +1059,136 @@ extern void ppu_initialize(const ppu_module& info)
return;
}
#ifdef LLVM_AVAILABLE
using namespace llvm;
// Initialize JIT compiler
if (!fxm::check<jit_compiler>())
{
std::unordered_map<std::string, u64> link_table
{
{ "__mptr", (u64)&vm::g_base_addr },
{ "__cptr", (u64)&vm::g_exec_addr },
{ "__trap", (u64)&ppu_trap },
{ "__error", (u64)&ppu_error },
{ "__check", (u64)&ppu_check },
{ "__trace", (u64)&ppu_trace },
{ "__syscall", (u64)&ppu_execute_syscall },
{ "__get_tb", (u64)&get_timebased_time },
{ "__lwarx", (u64)&ppu_lwarx },
{ "__ldarx", (u64)&ppu_ldarx },
{ "__stwcx", (u64)&ppu_stwcx },
{ "__stdcx", (u64)&ppu_stdcx },
{ "__vexptefp", (u64)&sse_exp2_ps },
{ "__vlogefp", (u64)&sse_log2_ps },
{ "__vperm", (u64)&sse_altivec_vperm },
{ "__lvsl", (u64)&sse_altivec_lvsl },
{ "__lvsr", (u64)&sse_altivec_lvsr },
{ "__lvlx", (u64)&sse_cellbe_lvlx },
{ "__lvrx", (u64)&sse_cellbe_lvrx },
{ "__stvlx", (u64)&sse_cellbe_stvlx },
{ "__stvrx", (u64)&sse_cellbe_stvrx },
};
for (u64 index = 0; index < 1024; index++)
{
if (auto sc = ppu_get_syscall(index))
{
link_table.emplace(ppu_get_syscall_name(index), (u64)sc);
}
}
fxm::make<jit_compiler>(std::move(link_table), g_cfg.core.llvm_cpu);
}
#endif
// Split module into fragments <= 1 MiB
std::size_t fpos = 0;
ppu_module part;
part.funcs.reserve(65536);
while (fpos < info.funcs.size())
{
const auto fstart = fpos;
std::size_t bsize = 0;
part.funcs.clear();
while (fpos < info.funcs.size())
{
auto& func = info.funcs[fpos];
if (bsize + func.size > 1024 * 1024 && bsize)
{
break;
}
for (auto&& block : func.blocks)
{
bsize += block.second;
// Also split functions blocks into functions (TODO)
ppu_function entry;
entry.addr = block.first;
entry.size = block.second;
entry.toc = func.toc;
fmt::append(entry.name, "__0x%x", block.first);
part.funcs.emplace_back(std::move(entry));
}
fpos++;
}
part.name.clear();
if (info.name.size())
{
part.name += '-';
part.name += info.name;
}
if (fstart)
{
fmt::append(part.name, "+%06X", info.funcs.at(fstart).addr);
}
else if (fpos < info.funcs.size())
{
part.name.append("+0");
}
ppu_initialize2(part);
}
#ifdef LLVM_AVAILABLE
const auto jit = fxm::check_unlocked<jit_compiler>();
jit->fin(Emu.GetCachePath());
// Get and install function addresses
for (const auto& func : info.funcs)
{
if (!func.size) continue;
for (const auto& block : func.blocks)
{
if (block.second)
{
ppu_ref(block.first) = ::narrow<u32>(jit->get(fmt::format("__0x%x", block.first)));
}
}
}
#endif
}
static void ppu_initialize2(const ppu_module& module_part)
{
if (Emu.IsStopped())
{
return;
}
// Compute module hash
std::string obj_name;
{
@ -943,7 +1196,7 @@ extern void ppu_initialize(const ppu_module& info)
u8 output[20];
sha1_starts(&ctx);
for (const auto& func : info.funcs)
for (const auto& func : module_part.funcs)
{
if (func.size == 0)
{
@ -964,59 +1217,19 @@ extern void ppu_initialize(const ppu_module& info)
sha1_update(&ctx, vm::ps3::_ptr<const u8>(block.first), block.second);
}
sha1_update(&ctx, vm::ps3::_ptr<const u8>(func.addr), func.size);
}
sha1_finish(&ctx, output);
// Version, module name and hash: vX-liblv2.sprx-0123456789ABCDEF.obj
fmt::append(obj_name, "v1-%s-%016X.obj", info.name, reinterpret_cast<be_t<u64>&>(output));
fmt::append(obj_name, "b1%s-%016X.obj", module_part.name, reinterpret_cast<be_t<u64>&>(output));
}
#ifdef LLVM_AVAILABLE
using namespace llvm;
if (!fxm::check<jit_compiler>())
{
std::unordered_map<std::string, std::uintptr_t> link_table
{
{ "__mptr", (u64)&vm::g_base_addr },
{ "__cptr", (u64)&vm::g_exec_addr },
{ "__trap", (u64)&ppu_trap },
{ "__end", (u64)&ppu_unreachable },
{ "__check", (u64)&ppu_check },
{ "__trace", (u64)&ppu_trace },
{ "__syscall", (u64)&ppu_execute_syscall },
{ "__get_tb", (u64)&get_timebased_time },
{ "__lwarx", (u64)&ppu_lwarx },
{ "__ldarx", (u64)&ppu_ldarx },
{ "__stwcx", (u64)&ppu_stwcx },
{ "__stdcx", (u64)&ppu_stdcx },
{ "__adde_get_ca", (u64)&adde_carry },
{ "__vexptefp", (u64)&sse_exp2_ps },
{ "__vlogefp", (u64)&sse_log2_ps },
{ "__vperm", (u64)&sse_altivec_vperm },
{ "__lvsl", (u64)&sse_altivec_lvsl },
{ "__lvsr", (u64)&sse_altivec_lvsr },
{ "__lvlx", (u64)&sse_cellbe_lvlx },
{ "__lvrx", (u64)&sse_cellbe_lvrx },
{ "__stvlx", (u64)&sse_cellbe_stvlx },
{ "__stvrx", (u64)&sse_cellbe_stvrx },
};
for (u64 index = 0; index < 1024; index++)
{
if (auto sc = ppu_get_syscall(index))
{
link_table.emplace(ppu_get_syscall_name(index), (u64)sc);
}
}
const auto jit = fxm::make<jit_compiler>(std::move(link_table), g_cfg.core.llvm_cpu);
LOG_SUCCESS(PPU, "LLVM: JIT initialized (%s)", jit->cpu());
}
// Initialize compiler
const auto jit = fxm::get<jit_compiler>();
// Create LLVM module
@ -1030,216 +1243,186 @@ extern void ppu_initialize(const ppu_module& info)
// Define some types
const auto _void = Type::getVoidTy(g_llvm_ctx);
const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false);
const auto _func = FunctionType::get(_void, {translator->GetContextType()->getPointerTo()}, false);
// Initialize function list
for (const auto& func : info.funcs)
for (const auto& func : module_part.funcs)
{
if (func.size)
{
const auto f = cast<Function>(module->getOrInsertFunction(fmt::format("__0x%x", func.addr), _func));
const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func));
f->addAttribute(1, Attribute::NoAlias);
translator->AddFunction(func.addr, f);
}
}
if (fs::file cached{Emu.GetCachePath() + obj_name})
{
std::string buf;
buf.reserve(cached.size());
cached.read(buf, cached.size());
auto buffer = llvm::MemoryBuffer::getMemBuffer(buf, obj_name);
auto result = llvm::object::ObjectFile::createObjectFile(*buffer);
if (result)
{
jit->load(std::move(module), std::move(result.get()));
std::shared_ptr<MsgDialogBase> dlg;
for (const auto& func : info.funcs)
// Check cached file
if (!fs::is_file(Emu.GetCachePath() + obj_name))
{
legacy::FunctionPassManager pm(module.get());
// Basic optimizations
pm.add(createCFGSimplificationPass());
pm.add(createPromoteMemoryToRegisterPass());
pm.add(createEarlyCSEPass());
pm.add(createTailCallEliminationPass());
pm.add(createReassociatePass());
pm.add(createInstructionCombiningPass());
//pm.add(createBasicAAWrapperPass());
//pm.add(new MemoryDependenceAnalysis());
pm.add(createLICMPass());
pm.add(createLoopInstSimplifyPass());
pm.add(createNewGVNPass());
pm.add(createDeadStoreEliminationPass());
pm.add(createSCCPPass());
pm.add(createInstructionCombiningPass());
pm.add(createInstructionSimplifierPass());
pm.add(createAggressiveDCEPass());
pm.add(createCFGSimplificationPass());
//pm.add(createLintPass()); // Check
// Initialize message dialog
dlg = Emu.GetCallbacks().get_msg_dialog();
dlg->type.se_normal = true;
dlg->type.bg_invisible = true;
dlg->type.progress_bar_count = 1;
dlg->on_close = [](s32 status)
{
Emu.CallAfter([]()
{
if (func.size)
{
const std::uintptr_t uptr = jit->get(fmt::format("__0x%x", func.addr));
ppu_ref(func.addr) = ::narrow<u32>(uptr);
}
}
LOG_SUCCESS(PPU, "LLVM: Loaded executable: %s", obj_name);
return;
}
LOG_ERROR(PPU, "LLVM: Failed to load executable: %s", obj_name);
}
legacy::FunctionPassManager pm(module.get());
// Basic optimizations
pm.add(createCFGSimplificationPass());
pm.add(createPromoteMemoryToRegisterPass());
pm.add(createEarlyCSEPass());
pm.add(createTailCallEliminationPass());
pm.add(createReassociatePass());
pm.add(createInstructionCombiningPass());
//pm.add(createBasicAAWrapperPass());
//pm.add(new MemoryDependenceAnalysis());
pm.add(createLICMPass());
pm.add(createLoopInstSimplifyPass());
pm.add(createNewGVNPass());
pm.add(createDeadStoreEliminationPass());
pm.add(createSCCPPass());
pm.add(createInstructionCombiningPass());
pm.add(createInstructionSimplifierPass());
pm.add(createAggressiveDCEPass());
pm.add(createCFGSimplificationPass());
//pm.add(createLintPass()); // Check
// Initialize message dialog
const auto dlg = Emu.GetCallbacks().get_msg_dialog();
dlg->type.se_normal = true;
dlg->type.bg_invisible = true;
dlg->type.progress_bar_count = 1;
dlg->on_close = [](s32 status)
{
Emu.CallAfter([]()
{
// Abort everything
Emu.Stop();
});
};
Emu.CallAfter([=]()
{
dlg->Create("Compiling PPU executable: " + info.name + "\nPlease wait...");
});
// Translate functions
for (size_t fi = 0, fmax = info.funcs.size(); fi < fmax; fi++)
{
if (Emu.IsStopped())
{
LOG_SUCCESS(PPU, "LLVM: Translation cancelled");
return;
}
if (info.funcs[fi].size)
{
// Update dialog
Emu.CallAfter([=, max = info.funcs.size()]()
{
dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, fmax));
if (fi * 100 / fmax != (fi + 1) * 100 / fmax)
dlg->ProgressBarInc(0, 1);
// Abort everything
Emu.Stop();
});
};
// Translate
const auto func = translator->TranslateToIR(info.funcs[fi], vm::_ptr<u32>(info.funcs[fi].addr));
Emu.CallAfter([=]()
{
dlg->Create("Compiling PPU module " + obj_name + "\nPlease wait...");
});
// Run optimization passes
pm.run(*func);
const auto _syscall = module->getFunction("__syscall");
for (auto i = inst_begin(*func), end = inst_end(*func); i != end;)
// Translate functions
for (size_t fi = 0, fmax = module_part.funcs.size(); fi < fmax; fi++)
{
if (Emu.IsStopped())
{
const auto inst = &*i++;
LOG_SUCCESS(PPU, "LLVM: Translation cancelled");
return;
}
if (const auto ci = dyn_cast<CallInst>(inst))
if (module_part.funcs[fi].size && !test(module_part.funcs[fi].attr & ppu_attr::special))
{
// Update dialog
Emu.CallAfter([=, max = module_part.funcs.size()]()
{
const auto cif = ci->getCalledFunction();
const auto op1 = ci->getNumArgOperands() > 1 ? ci->getArgOperand(1) : nullptr;
dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, fmax));
if (cif == _syscall && op1 && isa<ConstantInt>(op1))
if (fi * 100 / fmax != (fi + 1) * 100 / fmax)
dlg->ProgressBarInc(0, 1);
});
// Translate
const auto func = translator->Translate(module_part.funcs[fi]);
// Run optimization passes
pm.run(*func);
const auto _syscall = module->getFunction("__syscall");
for (auto i = inst_begin(*func), end = inst_end(*func); i != end;)
{
const auto inst = &*i++;
if (const auto ci = dyn_cast<CallInst>(inst))
{
// Try to determine syscall using the value from r11 (requires constant propagation)
const u64 index = cast<ConstantInt>(op1)->getZExtValue();
const auto cif = ci->getCalledFunction();
const auto op1 = ci->getNumArgOperands() > 1 ? ci->getArgOperand(1) : nullptr;
if (const auto ptr = ppu_get_syscall(index))
if (cif == _syscall && op1 && isa<ConstantInt>(op1))
{
const auto n = ppu_get_syscall_name(index);
const auto f = cast<Function>(module->getOrInsertFunction(n, _func));
// Try to determine syscall using the value from r11 (requires constant propagation)
const u64 index = cast<ConstantInt>(op1)->getZExtValue();
// Call the syscall directly
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
if (const auto ptr = ppu_get_syscall(index))
{
const auto n = ppu_get_syscall_name(index);
const auto f = cast<Function>(module->getOrInsertFunction(n, _func));
// Call the syscall directly
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
}
}
continue;
}
continue;
}
if (const auto li = dyn_cast<LoadInst>(inst))
{
// TODO: more careful check
if (li->getNumUses() == 0)
if (const auto li = dyn_cast<LoadInst>(inst))
{
// Remove unreferenced volatile loads
li->eraseFromParent();
// TODO: more careful check
if (li->getNumUses() == 0)
{
// Remove unreferenced volatile loads
li->eraseFromParent();
}
continue;
}
continue;
}
if (const auto si = dyn_cast<StoreInst>(inst))
{
// TODO: more careful check
if (isa<UndefValue>(si->getOperand(0)) && si->getParent() == &func->getEntryBlock())
if (const auto si = dyn_cast<StoreInst>(inst))
{
// Remove undef volatile stores
si->eraseFromParent();
}
// TODO: more careful check
if (isa<UndefValue>(si->getOperand(0)) && si->getParent() == &func->getEntryBlock())
{
// Remove undef volatile stores
si->eraseFromParent();
}
continue;
continue;
}
}
}
}
}
legacy::PassManager mpm;
legacy::PassManager mpm;
// Remove unused functions, structs, global variables, etc
mpm.add(createStripDeadPrototypesPass());
//mpm.add(createFunctionInliningPass());
mpm.add(createDeadInstEliminationPass());
mpm.run(*module);
// Remove unused functions, structs, global variables, etc
mpm.add(createStripDeadPrototypesPass());
//mpm.add(createFunctionInliningPass());
mpm.add(createDeadInstEliminationPass());
mpm.run(*module);
// Update dialog
Emu.CallAfter([=]()
{
dlg->ProgressBarSetMsg(0, "Generating code...");
dlg->ProgressBarInc(0, 100);
});
std::string result;
raw_string_ostream out(result);
if (g_cfg.core.llvm_logs)
{
out << *module; // print IR
fs::file(Emu.GetCachePath() + obj_name + ".log", fs::rewrite).write(out.str());
result.clear();
}
if (verifyModule(*module, &out))
{
out.flush();
LOG_ERROR(PPU, "LLVM: Verification failed for %s:\n%s", obj_name, result);
return;
}
LOG_NOTICE(PPU, "LLVM: %zu functions generated", module->getFunctionList().size());
jit->make(std::move(module), Emu.GetCachePath() + obj_name);
// Get and install function addresses
for (const auto& func : info.funcs)
{
if (func.size)
// Update dialog
Emu.CallAfter([=]()
{
const std::uintptr_t uptr = jit->get(fmt::format("__0x%x", func.addr));
ppu_ref(func.addr) = ::narrow<u32>(uptr);
dlg->ProgressBarSetMsg(0, "Generating code, this may take a long time...");
dlg->ProgressBarInc(0, 100);
});
std::string result;
raw_string_ostream out(result);
if (g_cfg.core.llvm_logs)
{
out << *module; // print IR
fs::file(Emu.GetCachePath() + obj_name + ".log", fs::rewrite).write(out.str());
result.clear();
}
if (verifyModule(*module, &out))
{
out.flush();
LOG_ERROR(PPU, "LLVM: Verification failed for %s:\n%s", obj_name, result);
return;
}
LOG_NOTICE(PPU, "LLVM: %zu functions generated", module->getFunctionList().size());
}
LOG_SUCCESS(PPU, "LLVM: Created executable: %s", obj_name);
// Access JIT compiler
if (const auto jit = fxm::check_unlocked<jit_compiler>())
{
// Load or compile module
jit->add(std::move(module), Emu.GetCachePath());
}
#endif
}

File diff suppressed because it is too large Load Diff

View File

@ -119,36 +119,18 @@ class PPUTranslator final //: public CPUTranslator
// Attributes for function calls which are "pure" and may be optimized away if their results are unused
const llvm::AttributeSet m_pure_attr;
// Available functions: types (not set or nullptr for untyped)
std::unordered_map<u64, llvm::FunctionType*> m_func_types;
// Available functions
std::unordered_map<u64, llvm::Function*> m_func_list;
// LLVM IR builder
// IR builder
llvm::IRBuilder<>* m_ir;
// LLVM function
llvm::Function* m_function;
// LLVM function type (may be null)
llvm::FunctionType* m_function_type;
// Function range
u64 m_start_addr, m_end_addr, m_current_addr;
// Basic blocks for current function
std::unordered_map<u64, llvm::BasicBlock*> m_blocks;
// JT resolver block
llvm::BasicBlock* m_jtr;
llvm::MDNode* m_md_unlikely;
llvm::MDNode* m_md_likely;
// Current binary data
be_t<u32>* m_bin{};
/* Variables */
// Memory base
@ -161,62 +143,65 @@ class PPUTranslator final //: public CPUTranslator
// Callable functions
llvm::Value* m_call;
// Main block
llvm::BasicBlock* m_body;
llvm::BasicBlock* m_entry;
// Thread context struct
llvm::StructType* m_thread_type;
llvm::Value* m_globals[96]{};
llvm::Value** const m_g_gpr = m_globals + 0;
llvm::Value** const m_g_fpr = m_globals + 32;
llvm::Value** const m_g_vr = m_globals + 64;
llvm::Value* m_locals[96]{};
llvm::Value* m_globals[169];
llvm::Value* m_locals[169];
llvm::Value** const m_gpr = m_locals + 0;
llvm::Value** const m_fpr = m_locals + 32;
llvm::Value** const m_vr = m_locals + 64;
llvm::Value** const m_cr = m_locals + 96;
llvm::Value** const m_fc = m_locals + 128;
llvm::Value* m_cr[32]{};
llvm::Value* m_g_lr;
llvm::Value* m_reg_lr;
llvm::Value* m_reg_ctr; // CTR register (counter)
llvm::Value* m_reg_vrsave;
llvm::Value* m_xer_so; // XER.SO bit, summary overflow
llvm::Value* m_xer_ov; // XER.OV bit, overflow flag
llvm::Value* m_xer_ca; // XER.CA bit, carry flag
llvm::Value* m_xer_count;
llvm::Value* m_vscr_nj; // VSCR.NJ bit, non-Java mode
llvm::Value* m_vscr_sat; // VSCR.SAT bit, sticky saturation flag
std::array<bool, 169> m_writes;
std::array<bool, 169> m_reads;
llvm::Value* m_fpscr[32]{};
llvm::Value* m_fpscr_fx; // bit 32 (first)
llvm::Value* m_fpscr_ox; // bit 35 (4th)
llvm::Value* m_fpscr_ux;
llvm::Value* m_fpscr_zx;
llvm::Value* m_fpscr_xx;
llvm::Value* m_fpscr_vxsnan;
llvm::Value* m_fpscr_vxisi;
llvm::Value* m_fpscr_vxidi;
llvm::Value* m_fpscr_vxzdz;
llvm::Value* m_fpscr_vximz;
llvm::Value* m_fpscr_vxvc;
llvm::Value* m_fpscr_fr;
llvm::Value* m_fpscr_fi;
llvm::Value* m_fpscr_c;
llvm::Value* m_fpscr_lt;
llvm::Value* m_fpscr_gt;
llvm::Value* m_fpscr_eq;
llvm::Value* m_fpscr_un;
llvm::Value* m_fpscr_reserved;
llvm::Value* m_fpscr_vxsoft;
llvm::Value* m_fpscr_vxsqrt;
llvm::Value* m_fpscr_vxcvi;
llvm::Value* m_fpscr_ve;
llvm::Value* m_fpscr_oe;
llvm::Value* m_fpscr_ue;
llvm::Value* m_fpscr_ze;
llvm::Value* m_fpscr_xe;
llvm::Value* m_fpscr_ni;
llvm::Value* m_fpscr_rnh; // RN high bit
llvm::Value* m_fpscr_rnl; // RN low bit
#define DEF_VALUE(loc, glb, pos)\
llvm::Value*& loc = m_locals[pos];\
llvm::Value*& glb = m_globals[pos];
DEF_VALUE(m_lr, m_g_lr, 160);
DEF_VALUE(m_ctr, m_g_ctr, 161); // CTR register (counter)
DEF_VALUE(m_vrsave, m_g_vrsave, 162);
DEF_VALUE(m_so, m_g_so, 163); // XER.SO bit, summary overflow
DEF_VALUE(m_ov, m_g_ov, 164); // XER.OV bit, overflow flag
DEF_VALUE(m_ca, m_g_ca, 165); // XER.CA bit, carry flag
DEF_VALUE(m_cnt, m_g_cnt, 166);
DEF_VALUE(m_nj, m_g_nj, 167); // VSCR.NJ bit, non-Java mode
DEF_VALUE(m_sat, m_g_sat, 168); // VSCR.SAT bit, sticky saturation flag
#undef DEF_VALUE
template <typename T>
void RegInit(llvm::Value*& local)
{
if (!local)
{
local = new llvm::AllocaInst(GetType<T>(), nullptr, sizeof(T));
m_entry->getInstList().push_back(llvm::cast<llvm::Instruction>(local));
}
}
template <typename T>
llvm::Value* RegLoad(llvm::Value*& local)
{
RegInit<T>(local);
m_reads.at(&local - m_locals) = true;
return m_ir->CreateLoad(local);
}
template <typename T>
void RegStore(llvm::Value* value, llvm::Value*& local)
{
RegInit<T>(local);
m_writes.at(&local - m_locals) = true;
m_ir->CreateStore(value, local);
}
public:
@ -233,10 +218,10 @@ public:
llvm::Value* RotateLeft(llvm::Value* arg, llvm::Value* n);
// Emit function call
void CallFunction(u64 target, bool tail, llvm::Value* indirect = nullptr);
void CallFunction(u64 target, llvm::Value* indirect = nullptr);
// Set some registers to undef (after function call)
void UndefineVolatileRegisters();
// Write global registers
void FlushRegisters();
// Load gpr
llvm::Value* GetGpr(u32 r, u32 num_bits = 64);
@ -339,6 +324,9 @@ public:
// Set CR field based on unsigned comparison
void SetCrFieldUnsignedCmp(u32 n, llvm::Value* a, llvm::Value* b);
// Set CR field from FPSCR CC fieds
void SetCrFieldFPCC(u32 n);
// Set FPSCR CC fields provided, optionally updating CR1
void SetFPCC(llvm::Value* lt, llvm::Value* gt, llvm::Value* eq, llvm::Value* un, bool set_cr = false);
@ -376,7 +364,7 @@ public:
llvm::Value* CheckTrapCondition(u32 to, llvm::Value* left, llvm::Value* right);
// Emit trap
llvm::Value* Trap(u64 addr);
void Trap(u64 addr);
// Get condition for branch instructions
llvm::Value* CheckBranchCondition(u32 bo, u32 bi);
@ -440,11 +428,8 @@ public:
// Get thread context struct type
llvm::Type* GetContextType();
// Add function
void AddFunction(u64 addr, llvm::Function* func, llvm::FunctionType* type = nullptr);
// Parses PPU opcodes and translate them into LLVM IR
llvm::Function* TranslateToIR(const ppu_function& info, be_t<u32>* bin, void(*custom)(PPUTranslator*) = nullptr);
llvm::Function* Translate(const ppu_function& info);
void MFVSCR(ppu_opcode_t op);
void MTVSCR(ppu_opcode_t op);

View File

@ -979,6 +979,7 @@ extern void ppu_execute_syscall(ppu_thread& ppu, u64 code)
{
LOG_TODO(HLE, "Unimplemented syscall %s -> CELL_OK", ppu_get_syscall_name(code));
ppu.gpr[3] = 0;
ppu.cia += 4;
}
return;

View File

@ -348,7 +348,8 @@ namespace vm
}
}
void* real_addr = vm::base(addr);
void* real_addr = g_base_addr + addr;
void* exec_addr = g_exec_addr + addr;
#ifdef _WIN32
auto protection = flags & page_writable ? PAGE_READWRITE : (flags & page_readable ? PAGE_READONLY : PAGE_NOACCESS);
@ -455,7 +456,8 @@ namespace vm
}
}
void* real_addr = vm::base(addr);
void* real_addr = g_base_addr + addr;
void* exec_addr = g_exec_addr + addr;
#ifdef _WIN32
verify(__func__), ::VirtualFree(real_addr, size, MEM_DECOMMIT);