mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-14 10:21:21 +00:00
LLVM: Slice PPU executable memory
This commit is contained in:
parent
7b8fee7cdb
commit
9d5b75bb7a
@ -514,8 +514,8 @@ class jit_compiler final
|
||||
atomic_t<usz> m_disk_space = umax;
|
||||
|
||||
public:
|
||||
jit_compiler(const std::unordered_map<std::string, u64>& _link, const std::string& _cpu, u32 flags = 0);
|
||||
~jit_compiler();
|
||||
jit_compiler(const std::unordered_map<std::string, u64>& _link, const std::string& _cpu, u32 flags = 0, std::function<u64(const std::string&)> symbols_cement = {}) noexcept;
|
||||
~jit_compiler() noexcept;
|
||||
|
||||
// Get LLVM context
|
||||
auto& get_context()
|
||||
|
@ -77,8 +77,7 @@ static u64 make_null_function(const std::string& name)
|
||||
|
||||
if (res.ec == std::errc() && res.ptr == name.c_str() + name.size() && addr < 0x8000'0000)
|
||||
{
|
||||
// Point the garbage to reserved, non-executable memory
|
||||
return reinterpret_cast<u64>(vm::g_sudo_addr + addr);
|
||||
fmt::throw_exception("Unhandled symbols cementing! (name='%s'", name);
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,18 +173,34 @@ struct JITAnnouncer : llvm::JITEventListener
|
||||
struct MemoryManager1 : llvm::RTDyldMemoryManager
|
||||
{
|
||||
// 256 MiB for code or data
|
||||
static constexpr u64 c_max_size = 0x20000000 / 2;
|
||||
static constexpr u64 c_max_size = 0x1000'0000;
|
||||
|
||||
// Allocation unit (2M)
|
||||
static constexpr u64 c_page_size = 2 * 1024 * 1024;
|
||||
|
||||
// Reserve 512 MiB
|
||||
u8* const ptr = static_cast<u8*>(utils::memory_reserve(c_max_size * 2));
|
||||
// Reserve 256 MiB blocks
|
||||
void* m_code_mems = nullptr;
|
||||
void* m_data_ro_mems = nullptr;
|
||||
void* m_data_rw_mems = nullptr;
|
||||
|
||||
u64 code_ptr = 0;
|
||||
u64 data_ptr = c_max_size;
|
||||
u64 data_ro_ptr = 0;
|
||||
u64 data_rw_ptr = 0;
|
||||
|
||||
MemoryManager1() = default;
|
||||
// First fallback for non-existing symbols
|
||||
// May be a memory container internally
|
||||
std::function<u64(const std::string&)> m_symbols_cement;
|
||||
|
||||
MemoryManager1(std::function<u64(const std::string&)> symbols_cement = {}) noexcept
|
||||
: m_symbols_cement(std::move(symbols_cement))
|
||||
{
|
||||
auto ptr = reinterpret_cast<u8*>(utils::memory_reserve(c_max_size * 3));
|
||||
m_code_mems = ptr;
|
||||
// ptr += c_max_size;
|
||||
// m_data_ro_mems = ptr;
|
||||
ptr += c_max_size;
|
||||
m_data_rw_mems = ptr;
|
||||
}
|
||||
|
||||
MemoryManager1(const MemoryManager1&) = delete;
|
||||
|
||||
@ -194,13 +209,22 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
||||
~MemoryManager1() override
|
||||
{
|
||||
// Hack: don't release to prevent reuse of address space, see jit_announce
|
||||
utils::memory_decommit(ptr, c_max_size * 2);
|
||||
// constexpr auto how_much = [](u64 pos) { return utils::align(pos, pos < c_page_size ? c_page_size / 4 : c_page_size); };
|
||||
// utils::memory_decommit(m_code_mems, how_much(code_ptr));
|
||||
// utils::memory_decommit(m_data_ro_mems, how_much(data_ro_ptr));
|
||||
// utils::memory_decommit(m_data_rw_mems, how_much(data_rw_ptr));
|
||||
utils::memory_decommit(m_code_mems, c_max_size * 3);
|
||||
}
|
||||
|
||||
llvm::JITSymbol findSymbol(const std::string& name) override
|
||||
{
|
||||
u64 addr = RTDyldMemoryManager::getSymbolAddress(name);
|
||||
|
||||
if (!addr && m_symbols_cement)
|
||||
{
|
||||
addr = m_symbols_cement(name);
|
||||
}
|
||||
|
||||
if (!addr)
|
||||
{
|
||||
addr = make_null_function(name);
|
||||
@ -214,45 +238,79 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
||||
return {addr, llvm::JITSymbolFlags::Exported};
|
||||
}
|
||||
|
||||
u8* allocate(u64& oldp, uptr size, uint align, utils::protection prot)
|
||||
u8* allocate(u64& alloc_pos, void* block, uptr size, u64 align, utils::protection prot)
|
||||
{
|
||||
if (align > c_page_size)
|
||||
align = align ? align : 16;
|
||||
|
||||
const u64 sizea = utils::align(size, align);
|
||||
|
||||
if (!size || align > c_page_size || sizea > c_max_size || sizea < size)
|
||||
{
|
||||
jit_log.fatal("Unsupported alignment (size=0x%x, align=0x%x)", size, align);
|
||||
jit_log.fatal("Unsupported size/alignment (size=0x%x, align=0x%x)", size, align);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const u64 olda = utils::align(oldp, align);
|
||||
const u64 newp = utils::align(olda + size, align);
|
||||
u64 oldp = alloc_pos;
|
||||
|
||||
if ((newp - 1) / c_max_size != oldp / c_max_size)
|
||||
u64 olda = utils::align(oldp, align);
|
||||
|
||||
ensure(olda >= oldp);
|
||||
ensure(olda < ~sizea);
|
||||
|
||||
u64 newp = olda + sizea;
|
||||
|
||||
if ((newp - 1) / c_max_size != (oldp - 1) / c_max_size)
|
||||
{
|
||||
jit_log.fatal("Out of memory (size=0x%x, align=0x%x)", size, align);
|
||||
return nullptr;
|
||||
constexpr usz num_of_allocations = 1;
|
||||
|
||||
if ((newp - 1) / c_max_size > num_of_allocations)
|
||||
{
|
||||
// Allocating more than one region does not work for relocations, needs more robust solution
|
||||
fmt::throw_exception("Out of memory (size=0x%x, align=0x%x)", size, align);
|
||||
}
|
||||
}
|
||||
|
||||
if ((oldp - 1) / c_page_size != (newp - 1) / c_page_size)
|
||||
// Update allocation counter
|
||||
alloc_pos = newp;
|
||||
|
||||
constexpr usz page_quarter = c_page_size / 4;
|
||||
|
||||
// Optimization: split the first allocation to 512 KiB for single-module compilers
|
||||
if (oldp < c_page_size && align < page_quarter && (std::min(newp, c_page_size) - 1) / page_quarter != (oldp - 1) / page_quarter)
|
||||
{
|
||||
const u64 pagea = utils::align(oldp, page_quarter);
|
||||
const u64 psize = utils::align(std::min(newp, c_page_size) - pagea, page_quarter);
|
||||
utils::memory_commit(reinterpret_cast<u8*>(block) + (pagea % c_max_size), psize, prot);
|
||||
|
||||
// Advance
|
||||
oldp = pagea + psize;
|
||||
}
|
||||
|
||||
if ((newp - 1) / c_page_size != (oldp - 1) / c_page_size)
|
||||
{
|
||||
// Allocate pages on demand
|
||||
const u64 pagea = utils::align(oldp, c_page_size);
|
||||
const u64 psize = utils::align(newp - pagea, c_page_size);
|
||||
utils::memory_commit(this->ptr + pagea, psize, prot);
|
||||
utils::memory_commit(reinterpret_cast<u8*>(block) + (pagea % c_max_size), psize, prot);
|
||||
}
|
||||
|
||||
// Update allocation counter
|
||||
oldp = newp;
|
||||
|
||||
return this->ptr + olda;
|
||||
return reinterpret_cast<u8*>(block) + (olda % c_max_size);
|
||||
}
|
||||
|
||||
u8* allocateCodeSection(uptr size, uint align, uint /*sec_id*/, llvm::StringRef /*sec_name*/) override
|
||||
{
|
||||
return allocate(code_ptr, size, align, utils::protection::wx);
|
||||
return allocate(code_ptr, m_code_mems, size, align, utils::protection::wx);
|
||||
}
|
||||
|
||||
u8* allocateDataSection(uptr size, uint align, uint /*sec_id*/, llvm::StringRef /*sec_name*/, bool /*is_ro*/) override
|
||||
u8* allocateDataSection(uptr size, uint align, uint /*sec_id*/, llvm::StringRef /*sec_name*/, bool is_ro) override
|
||||
{
|
||||
return allocate(data_ptr, size, align, utils::protection::rw);
|
||||
if (is_ro)
|
||||
{
|
||||
// Disabled
|
||||
//return allocate(data_ro_ptr, m_data_ro_mems, size, align, utils::protection::rw);
|
||||
}
|
||||
|
||||
return allocate(data_rw_ptr, m_data_rw_mems, size, align, utils::protection::rw);
|
||||
}
|
||||
|
||||
bool finalizeMemory(std::string* = nullptr) override
|
||||
@ -272,7 +330,14 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
||||
// Simple memory manager
|
||||
struct MemoryManager2 : llvm::RTDyldMemoryManager
|
||||
{
|
||||
MemoryManager2() = default;
|
||||
// First fallback for non-existing symbols
|
||||
// May be a memory container internally
|
||||
std::function<u64(const std::string&)> m_symbols_cement;
|
||||
|
||||
MemoryManager2(std::function<u64(const std::string&)> symbols_cement = {}) noexcept
|
||||
: m_symbols_cement(std::move(symbols_cement))
|
||||
{
|
||||
}
|
||||
|
||||
~MemoryManager2() override
|
||||
{
|
||||
@ -282,6 +347,11 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager
|
||||
{
|
||||
u64 addr = RTDyldMemoryManager::getSymbolAddress(name);
|
||||
|
||||
if (!addr && m_symbols_cement)
|
||||
{
|
||||
addr = m_symbols_cement(name);
|
||||
}
|
||||
|
||||
if (!addr)
|
||||
{
|
||||
addr = make_null_function(name);
|
||||
@ -561,7 +631,7 @@ bool jit_compiler::add_sub_disk_space(ssz space)
|
||||
}).second;
|
||||
}
|
||||
|
||||
jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, const std::string& _cpu, u32 flags)
|
||||
jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, const std::string& _cpu, u32 flags, std::function<u64(const std::string&)> symbols_cement) noexcept
|
||||
: m_context(new llvm::LLVMContext)
|
||||
, m_cpu(cpu(_cpu))
|
||||
{
|
||||
@ -589,17 +659,17 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
|
||||
// Auxiliary JIT (does not use custom memory manager, only writes the objects)
|
||||
if (flags & 0x1)
|
||||
{
|
||||
mem = std::make_unique<MemoryManager1>();
|
||||
mem = std::make_unique<MemoryManager1>(std::move(symbols_cement));
|
||||
}
|
||||
else
|
||||
{
|
||||
mem = std::make_unique<MemoryManager2>();
|
||||
mem = std::make_unique<MemoryManager2>(std::move(symbols_cement));
|
||||
null_mod->setTargetTriple(jit_compiler::triple2());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mem = std::make_unique<MemoryManager1>();
|
||||
mem = std::make_unique<MemoryManager1>(std::move(symbols_cement));
|
||||
}
|
||||
|
||||
{
|
||||
@ -648,7 +718,7 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
|
||||
}
|
||||
}
|
||||
|
||||
jit_compiler::~jit_compiler()
|
||||
jit_compiler::~jit_compiler() noexcept
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -769,7 +769,7 @@ public:
|
||||
}
|
||||
|
||||
// Move the context (if movable)
|
||||
new (static_cast<void*>(m_threads + m_count - 1)) Thread(std::string(name) + std::to_string(m_count - 1), std::forward<Context>(f));
|
||||
new (static_cast<void*>(m_threads + m_count - 1)) Thread(std::string(name) + std::to_string(m_count), std::forward<Context>(f));
|
||||
}
|
||||
|
||||
// Constructor with a function performed before adding more threads
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <deque>
|
||||
#include <span>
|
||||
#include "util/types.hpp"
|
||||
#include "util/endian.hpp"
|
||||
#include "util/asm.hpp"
|
||||
@ -38,7 +39,51 @@ struct ppu_function
|
||||
std::map<u32, u32> blocks{}; // Basic blocks: addr -> size
|
||||
std::set<u32> calls{}; // Set of called functions
|
||||
std::set<u32> callers{};
|
||||
std::string name{}; // Function name
|
||||
mutable std::string name{}; // Function name
|
||||
|
||||
struct iterator
|
||||
{
|
||||
const ppu_function* _this;
|
||||
typename std::map<u32, u32>::const_iterator it;
|
||||
usz index = 0;
|
||||
|
||||
std::pair<const u32, u32> operator*() const
|
||||
{
|
||||
return _this->blocks.empty() ? std::pair<const u32, u32>(_this->addr, _this->size) : *it;
|
||||
}
|
||||
|
||||
iterator& operator++()
|
||||
{
|
||||
index++;
|
||||
|
||||
if (it != _this->blocks.end())
|
||||
{
|
||||
it++;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const iterator& rhs) const noexcept
|
||||
{
|
||||
return it == rhs.it || (rhs.index == index && _this->blocks.empty());
|
||||
}
|
||||
|
||||
bool operator!=(const iterator& rhs) const noexcept
|
||||
{
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
|
||||
iterator begin() const
|
||||
{
|
||||
return iterator{this, blocks.begin()};
|
||||
}
|
||||
|
||||
iterator end() const
|
||||
{
|
||||
return iterator{this, blocks.end(), 1};
|
||||
}
|
||||
};
|
||||
|
||||
// PPU Relocation Information
|
||||
@ -87,18 +132,56 @@ struct ppu_module : public Type
|
||||
|
||||
ppu_module& operator=(ppu_module&&) noexcept = default;
|
||||
|
||||
uchar sha1[20]{};
|
||||
std::string name{};
|
||||
std::string path{};
|
||||
uchar sha1[20]{}; // Hash
|
||||
std::string name{}; // Filename
|
||||
std::string path{}; // Filepath
|
||||
s64 offset = 0; // Offset of file
|
||||
std::string cache{};
|
||||
std::vector<ppu_reloc> relocs{};
|
||||
std::vector<ppu_segment> segs{};
|
||||
std::vector<ppu_segment> secs{};
|
||||
std::vector<ppu_function> funcs{};
|
||||
std::vector<u32> applied_patches;
|
||||
std::deque<std::shared_ptr<void>> allocations;
|
||||
std::map<u32, u32> addr_to_seg_index;
|
||||
mutable bs_t<ppu_attr> attr{}; // Shared module attributes
|
||||
std::string cache{}; // Cache file path
|
||||
std::vector<ppu_reloc> relocs{}; // Relocations
|
||||
std::vector<ppu_segment> segs{}; // Segments
|
||||
std::vector<ppu_segment> secs{}; // Segment sections
|
||||
std::vector<ppu_function> funcs{}; // Function list
|
||||
std::vector<u32> applied_patches; // Patch addresses
|
||||
std::deque<std::shared_ptr<void>> allocations; // Segment memory allocations
|
||||
std::map<u32, u32> addr_to_seg_index; // address->segment ordered translator map
|
||||
ppu_module* parent = nullptr;
|
||||
std::pair<u32, u32> local_bounds{0, u32{umax}}; // Module addresses range
|
||||
std::shared_ptr<std::pair<u32, u32>> jit_bounds; // JIT instance modules addresses range
|
||||
|
||||
template <typename T>
|
||||
auto as_span(T&& arg, bool bound_local, bool bound_jit) const
|
||||
{
|
||||
using unref = std::remove_reference_t<T>;
|
||||
using type = std::conditional_t<std::is_const_v<unref>, std::add_const_t<typename unref::value_type>, typename unref::value_type>;
|
||||
|
||||
if (bound_local || bound_jit)
|
||||
{
|
||||
// Return span bound to specified bounds
|
||||
const auto [min_addr, max_addr] = bound_jit ? *jit_bounds : local_bounds;
|
||||
constexpr auto compare = [](const type& a, u32 addr) { return a.addr < addr; };
|
||||
const auto end = arg.data() + arg.size();
|
||||
const auto start = std::lower_bound(arg.data(), end, min_addr, compare);
|
||||
return std::span<type>{ start, std::lower_bound(start, end, max_addr, compare) };
|
||||
}
|
||||
|
||||
return std::span<type>(arg.data(), arg.size());
|
||||
}
|
||||
|
||||
auto get_funcs(bool bound_local = true, bool bound_jit = false)
|
||||
{
|
||||
return as_span(parent ? parent->funcs : funcs, bound_local, bound_jit);
|
||||
}
|
||||
|
||||
auto get_funcs(bool bound_local = true, bool bound_jit = false) const
|
||||
{
|
||||
return as_span(parent ? parent->funcs : funcs, bound_local, bound_jit);
|
||||
}
|
||||
|
||||
auto get_relocs(bool bound_local = false) const
|
||||
{
|
||||
return as_span(parent ? parent->relocs : relocs, bound_local, false);
|
||||
}
|
||||
|
||||
// Copy info without functions
|
||||
void copy_part(const ppu_module& info)
|
||||
@ -106,11 +189,12 @@ struct ppu_module : public Type
|
||||
std::memcpy(sha1, info.sha1, sizeof(sha1));
|
||||
name = info.name;
|
||||
path = info.path;
|
||||
relocs = info.relocs;
|
||||
segs = info.segs;
|
||||
secs = info.secs;
|
||||
allocations = info.allocations;
|
||||
addr_to_seg_index = info.addr_to_seg_index;
|
||||
parent = const_cast<ppu_module*>(&info);
|
||||
attr = info.attr;
|
||||
local_bounds = {u32{umax}, 0}; // Initially empty range
|
||||
}
|
||||
|
||||
bool analyse(u32 lib_toc, u32 entry, u32 end, const std::vector<u32>& applied, const std::vector<u32>& exported_funcs = std::vector<u32>{}, std::function<bool()> check_aborted = {});
|
||||
|
@ -66,6 +66,7 @@
|
||||
#include <cctype>
|
||||
#include <span>
|
||||
#include <optional>
|
||||
#include <charconv>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "util/vm.hpp"
|
||||
@ -176,7 +177,7 @@ bool serialize<ppu_thread::cr_bits>(utils::serial& ar, typename ppu_thread::cr_b
|
||||
extern void ppu_initialize();
|
||||
extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release = false);
|
||||
extern bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only = false, u64 file_size = 0);
|
||||
static void ppu_initialize2(class jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name, const ppu_module<lv2_obj>& whole_module);
|
||||
static void ppu_initialize2(class jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name);
|
||||
extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr);
|
||||
extern std::pair<shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, bool virtual_load, const std::string& path, s64 file_offset, utils::serial* = nullptr);
|
||||
extern void ppu_unload_prx(const lv2_prx&);
|
||||
@ -342,11 +343,10 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
||||
// Load offset value
|
||||
c.mov(cia_addr_reg, Imm(static_cast<u64>(::offset32(&ppu_thread::cia))));
|
||||
// Load cia
|
||||
c.ldr(a64::w15, arm::Mem(ppu_t_base, cia_addr_reg));
|
||||
c.ldr(pc.w(), arm::Mem(ppu_t_base, cia_addr_reg));
|
||||
|
||||
// Multiply by 2 to index into ptr table
|
||||
const arm::GpX index_shift = a64::x12;
|
||||
c.mov(index_shift, Imm(2));
|
||||
c.mul(pc, pc, index_shift);
|
||||
c.add(pc, pc, pc);
|
||||
|
||||
// Load call target
|
||||
const arm::GpX call_target = a64::x13;
|
||||
@ -355,7 +355,7 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
||||
const arm::GpX reg_hp = a64::x21;
|
||||
c.mov(reg_hp, call_target);
|
||||
c.lsr(reg_hp, reg_hp, 48);
|
||||
c.lsl(a64::w21, a64::w21, 13);
|
||||
c.lsl(reg_hp.w(), reg_hp.w(), 13);
|
||||
|
||||
// Zero top 16 bits of call target
|
||||
c.lsl(call_target, call_target, Imm(16));
|
||||
@ -3665,6 +3665,9 @@ struct jit_core_allocator
|
||||
// Initialize global semaphore with the max number of threads
|
||||
::semaphore<0x7fff> sem{std::max<s16>(thread_count, 1)};
|
||||
|
||||
// Mutex for special extra-large modules to compile alone
|
||||
shared_mutex shared_mtx;
|
||||
|
||||
static s16 limit()
|
||||
{
|
||||
return static_cast<s16>(std::min<s32>(0x7fff, utils::get_thread_count()));
|
||||
@ -3677,8 +3680,8 @@ namespace
|
||||
// Compiled PPU module info
|
||||
struct jit_module
|
||||
{
|
||||
void(*symbol_resolver)(u8*, u64) = nullptr;
|
||||
std::shared_ptr<jit_compiler> pjit;
|
||||
std::vector<void(*)(u8*, u64)> symbol_resolvers;
|
||||
std::vector<std::shared_ptr<jit_compiler>> pjit;
|
||||
bool init = false;
|
||||
};
|
||||
|
||||
@ -3729,6 +3732,7 @@ namespace
|
||||
}
|
||||
|
||||
to_destroy.pjit = std::move(found->second.pjit);
|
||||
to_destroy.symbol_resolvers = std::move(found->second.symbol_resolvers);
|
||||
|
||||
bucket.map.erase(found);
|
||||
}
|
||||
@ -4445,7 +4449,7 @@ extern void ppu_initialize()
|
||||
|
||||
idm::select<lv2_obj, lv2_prx>([&](u32, lv2_prx& _module)
|
||||
{
|
||||
if (_module.funcs.empty())
|
||||
if (_module.get_funcs().empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -4556,7 +4560,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
|
||||
auto& ppu_toc = toc_manager.toc_map;
|
||||
|
||||
for (const auto& func : info.funcs)
|
||||
for (const auto& func : info.get_funcs())
|
||||
{
|
||||
if (func.size && func.blocks.empty())
|
||||
{
|
||||
@ -4659,11 +4663,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));
|
||||
|
||||
// Compiler instance (deferred initialization)
|
||||
std::shared_ptr<jit_compiler>& jit = jit_mod.pjit;
|
||||
std::vector<std::shared_ptr<jit_compiler>>& jits = jit_mod.pjit;
|
||||
|
||||
// Split module into fragments <= 1 MiB
|
||||
usz fpos = 0;
|
||||
|
||||
// Modules counted so far
|
||||
usz module_counter = 0;
|
||||
|
||||
// Difference between function name and current location
|
||||
const u32 reloc = info.relocs.empty() ? 0 : ::at32(info.segs, 0).addr;
|
||||
|
||||
@ -4684,14 +4691,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
|
||||
const cpu_thread* cpu = cpu_thread::get_current();
|
||||
|
||||
for (auto& func : info.funcs)
|
||||
for (auto& func : info.get_funcs())
|
||||
{
|
||||
if (func.size == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto& [addr, size] : func.blocks)
|
||||
for (const auto [addr, size] : func)
|
||||
{
|
||||
if (size == 0)
|
||||
{
|
||||
@ -4724,26 +4731,138 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
|
||||
u32 total_compile = 0;
|
||||
|
||||
while (!jit_mod.init && fpos < info.funcs.size())
|
||||
// Limit how many modules are per JIt instance
|
||||
// Advantage to lower the limit:
|
||||
// 1. Lowering contoniues memory requirements for allocations
|
||||
// Its disadvantage:
|
||||
// 1. B instruction can wander up to 16MB relatively to its range,
|
||||
// each additional split of JIT instance results in a downgraded version of around (100% / N-1th) - (100% / Nth) percent of instructions
|
||||
// where N is the total amunt of JIT instances
|
||||
// Subject to change
|
||||
constexpr u32 c_moudles_per_jit = 100;
|
||||
|
||||
std::shared_ptr<std::pair<u32, u32>> local_jit_bounds = std::make_shared<std::pair<u32, u32>>(u32{umax}, 0);
|
||||
|
||||
const auto shared_runtime = make_shared<jit_runtime>();
|
||||
const auto shared_map = make_shared<std::unordered_map<u32, u64>>();
|
||||
const auto shared_mtx = make_shared<shared_mutex>();
|
||||
|
||||
auto symbols_cement = [runtime = shared_runtime, reloc, bound = info.segs[0].addr + info.segs[0].size - reloc, func_map = shared_map, shared_mtx](const std::string& name) -> u64
|
||||
{
|
||||
// Initialize compiler instance
|
||||
if (!jit && is_being_used_in_emulation)
|
||||
u32 func_addr = umax;
|
||||
|
||||
if (name.starts_with("__0x"))
|
||||
{
|
||||
jit = std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu);
|
||||
u32 addr = umax;
|
||||
auto res = std::from_chars(name.c_str() + 4, name.c_str() + name.size(), addr, 16);
|
||||
|
||||
if (res.ec == std::errc() && res.ptr == name.c_str() + name.size() && addr < bound)
|
||||
{
|
||||
func_addr = addr + reloc;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy module information (TODO: optimize)
|
||||
if (func_addr == umax)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
reader_lock rlock(*shared_mtx);
|
||||
|
||||
if (auto it = func_map->find(func_addr); it != func_map->end())
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
|
||||
rlock.upgrade();
|
||||
|
||||
u64& code_ptr = (*func_map)[func_addr];
|
||||
|
||||
if (code_ptr)
|
||||
{
|
||||
return +code_ptr;
|
||||
}
|
||||
|
||||
using namespace asmjit;
|
||||
|
||||
auto func = build_function_asm<u8*(*)(ppu_thread&, u64, u8*, u64, u64, u64)>(name, [&](native_asm& c, auto& args)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
c.mov(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
c.mov(x86::edx, func_addr); // Load PC
|
||||
c.mov(x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia)), x86::edx);
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rax, x86::edx, 1, 0)); // Load call target
|
||||
c.mov(x86::rdx, x86::rax);
|
||||
c.shl(x86::rax, 16);
|
||||
c.shr(x86::rax, 16);
|
||||
c.shr(x86::rdx, 48);
|
||||
c.shl(x86::edx, 13);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
c.jmp(x86::rax);
|
||||
#else
|
||||
// Load REG_Base - use absolute jump target to bypass rel jmp range limits
|
||||
// X19 contains vm::g_exec_addr
|
||||
const arm::GpX exec_addr = a64::x19;
|
||||
|
||||
// X20 contains ppu_thread*
|
||||
const arm::GpX ppu_t_base = a64::x20;
|
||||
|
||||
// Load PC
|
||||
const arm::GpX pc = a64::x15;
|
||||
const arm::GpX cia_addr_reg = a64::x11;
|
||||
|
||||
// Load offset value
|
||||
c.mov(cia_addr_reg, static_cast<u64>(::offset32(&ppu_thread::cia)));
|
||||
|
||||
// Update CIA
|
||||
c.mov(pc.w(), func_addr);
|
||||
c.str(pc.w(), arm::Mem(ppu_t_base, cia_addr_reg));
|
||||
|
||||
// Multiply by 2 to index into ptr table
|
||||
c.add(pc, pc, pc);
|
||||
|
||||
// Load call target
|
||||
const arm::GpX call_target = a64::x13;
|
||||
c.ldr(call_target, arm::Mem(exec_addr, pc));
|
||||
|
||||
// Compute REG_Hp
|
||||
const arm::GpX reg_hp = a64::x21;
|
||||
c.mov(reg_hp, call_target);
|
||||
c.lsr(reg_hp, reg_hp, 48);
|
||||
c.lsl(reg_hp.w(), reg_hp.w(), 13);
|
||||
|
||||
// Zero top 16 bits of call target
|
||||
c.lsl(call_target, call_target, 16);
|
||||
c.lsr(call_target, call_target, 16);
|
||||
|
||||
// Execute LLE call
|
||||
c.br(call_target);
|
||||
#endif
|
||||
}, runtime.get());
|
||||
|
||||
code_ptr = reinterpret_cast<u64>(func);
|
||||
return code_ptr;
|
||||
};
|
||||
|
||||
if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
|
||||
{
|
||||
info.attr += ppu_attr::has_mfvscr;
|
||||
}
|
||||
|
||||
while (!jit_mod.init && fpos < info.get_funcs().size())
|
||||
{
|
||||
// Copy module information
|
||||
ppu_module<lv2_obj> part;
|
||||
part.copy_part(info);
|
||||
part.funcs.reserve(16000);
|
||||
|
||||
// Overall block size in bytes
|
||||
usz bsize = 0;
|
||||
usz bcount = 0;
|
||||
|
||||
while (fpos < info.funcs.size())
|
||||
while (fpos < info.get_funcs().size())
|
||||
{
|
||||
auto& func = info.funcs[fpos];
|
||||
auto& func = info.get_funcs()[fpos];
|
||||
|
||||
if (!func.size)
|
||||
{
|
||||
@ -4767,9 +4886,9 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
{
|
||||
auto far_jump = ensure(g_fxo->get<ppu_far_jumps_t>().gen_jump(source));
|
||||
|
||||
if (source == func.addr && jit)
|
||||
if (source == func.addr)
|
||||
{
|
||||
jit->update_global_mapping(fmt::format("__0x%x", func.addr - reloc), reinterpret_cast<u64>(far_jump));
|
||||
(*shared_map)[func.addr - reloc] = reinterpret_cast<u64>(far_jump);
|
||||
}
|
||||
|
||||
ppu_register_function_at(source, 4, far_jump);
|
||||
@ -4783,22 +4902,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
}
|
||||
}
|
||||
|
||||
// Copy block or function entry
|
||||
ppu_function& entry = part.funcs.emplace_back(func);
|
||||
local_jit_bounds->first = std::min<u32>(local_jit_bounds->first, func.addr);
|
||||
local_jit_bounds->second = std::max<u32>(local_jit_bounds->second, func.addr + func.size);
|
||||
|
||||
part.local_bounds.first = std::min<u32>(part.local_bounds.first, func.addr);
|
||||
part.local_bounds.second = std::max<u32>(part.local_bounds.second, func.addr + func.size);
|
||||
|
||||
// Fixup some information
|
||||
entry.name = fmt::format("__0x%x", entry.addr - reloc);
|
||||
|
||||
if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
|
||||
{
|
||||
// TODO
|
||||
entry.attr += ppu_attr::has_mfvscr;
|
||||
}
|
||||
|
||||
if (entry.blocks.empty())
|
||||
{
|
||||
entry.blocks.emplace(func.addr, func.size);
|
||||
}
|
||||
func.name = fmt::format("__0x%x", func.addr - reloc);
|
||||
|
||||
bsize += func.size;
|
||||
|
||||
@ -4815,7 +4926,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
|
||||
int has_dcbz = !!g_cfg.core.accurate_cache_line_stores;
|
||||
|
||||
for (const auto& func : part.funcs)
|
||||
for (const auto& func : part.get_funcs())
|
||||
{
|
||||
if (func.size == 0)
|
||||
{
|
||||
@ -4827,7 +4938,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
sha1_update(&ctx, reinterpret_cast<const u8*>(&addr), sizeof(addr));
|
||||
sha1_update(&ctx, reinterpret_cast<const u8*>(&size), sizeof(size));
|
||||
|
||||
for (const auto& block : func.blocks)
|
||||
for (const auto block : func)
|
||||
{
|
||||
if (block.second == 0 || reloc)
|
||||
{
|
||||
@ -4898,7 +5009,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
sha1_update(&ctx, ensure(info.get_ptr<const u8>(func.addr)), func.size);
|
||||
}
|
||||
|
||||
if (!workload.empty() && fpos >= info.funcs.size())
|
||||
if (fpos >= info.get_funcs().size() || module_counter % c_moudles_per_jit == c_moudles_per_jit - 1)
|
||||
{
|
||||
// Hash the entire function grouped addresses for the integrity of the symbol resolver function
|
||||
// Potentially occuring during patches
|
||||
@ -4906,7 +5017,13 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
|
||||
std::vector<be_t<u32>> addrs;
|
||||
|
||||
for (const ppu_function& func : info.funcs)
|
||||
constexpr auto compare = [](const ppu_function& a, u32 addr) { return a.addr < addr; };
|
||||
|
||||
const auto start = std::lower_bound(info.funcs.begin(), info.funcs.end(), local_jit_bounds->first, compare);
|
||||
|
||||
std::span<const ppu_function> span_range{ start, std::lower_bound(start, info.funcs.end(), local_jit_bounds->second, compare) };
|
||||
|
||||
for (const ppu_function& func : span_range)
|
||||
{
|
||||
if (func.size == 0)
|
||||
{
|
||||
@ -4919,7 +5036,13 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
// Hash its size too
|
||||
addrs.emplace_back(::size32(addrs));
|
||||
|
||||
sha1_update(&ctx, reinterpret_cast<const u8*>(addrs.data()), addrs.size() * sizeof(be_t<u32>));
|
||||
if (module_counter != 0)
|
||||
{
|
||||
sha1_update(&ctx, reinterpret_cast<const u8*>(addrs.data()), addrs.size() * sizeof(be_t<u32>));
|
||||
}
|
||||
|
||||
part.jit_bounds = std::move(local_jit_bounds);
|
||||
local_jit_bounds = std::make_shared<std::pair<u32, u32>>(u32{umax}, 0);
|
||||
}
|
||||
|
||||
if (false)
|
||||
@ -4974,7 +5097,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
settings += ppu_settings::accurate_vnan, settings -= ppu_settings::fixup_vnan, fmt::throw_exception("VNAN Not implemented");
|
||||
if (g_cfg.core.ppu_use_nj_bit)
|
||||
settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented");
|
||||
if (fpos >= info.funcs.size())
|
||||
if (fpos >= info.get_funcs().size() || module_counter % c_moudles_per_jit == c_moudles_per_jit - 1)
|
||||
settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
@ -4986,6 +5109,8 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
break;
|
||||
}
|
||||
|
||||
module_counter++;
|
||||
|
||||
if (!check_only)
|
||||
{
|
||||
total_compile++;
|
||||
@ -4996,13 +5121,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
// Check object file
|
||||
if (jit_compiler::check(cache_path + obj_name))
|
||||
{
|
||||
if (!jit && !check_only)
|
||||
if (!is_being_used_in_emulation && !check_only)
|
||||
{
|
||||
ppu_log.success("LLVM: Module exists: %s", obj_name);
|
||||
|
||||
// Done already, revert total amount increase
|
||||
// Avoid incrementing "pdone" instead because it creates false appreciation for both the progress dialog and the user
|
||||
total_compile--;
|
||||
link_workload.pop_back();
|
||||
}
|
||||
|
||||
continue;
|
||||
@ -5113,11 +5239,26 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
// Keep allocating workload
|
||||
const auto& [obj_name, part] = std::as_const(workload)[i];
|
||||
|
||||
std::shared_lock rlock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
|
||||
std::unique_lock lock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
|
||||
|
||||
if (part.jit_bounds && part.parent->funcs.size() >= 0x8000)
|
||||
{
|
||||
// Make a large symbol-resolving function compile alone because it has massive memory requirements
|
||||
lock.lock();
|
||||
}
|
||||
else
|
||||
{
|
||||
rlock.lock();
|
||||
}
|
||||
|
||||
ppu_log.warning("LLVM: Compiling module %s%s", cache_path, obj_name);
|
||||
|
||||
// Use another JIT instance
|
||||
jit_compiler jit2({}, g_cfg.core.llvm_cpu, 0x1);
|
||||
ppu_initialize2(jit2, part, cache_path, obj_name, i == workload.size() - 1 ? main_module : part);
|
||||
{
|
||||
// Use another JIT instance
|
||||
jit_compiler jit2({}, g_cfg.core.llvm_cpu, 0x1);
|
||||
ppu_initialize2(jit2, part, cache_path, obj_name);
|
||||
}
|
||||
|
||||
ppu_log.success("LLVM: Compiled module %s", obj_name);
|
||||
}
|
||||
@ -5145,6 +5286,17 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
g_watchdog_hold_ctr--;
|
||||
}
|
||||
|
||||
// Initialize compiler instance
|
||||
while (jits.size() < utils::aligned_div<u64>(module_counter, c_moudles_per_jit) && is_being_used_in_emulation)
|
||||
{
|
||||
jits.emplace_back(std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu, 0, symbols_cement));
|
||||
}
|
||||
|
||||
if (jit_mod.symbol_resolvers.empty() && is_being_used_in_emulation)
|
||||
{
|
||||
jit_mod.symbol_resolvers.resize(jits.size());
|
||||
}
|
||||
|
||||
bool failed_to_load = false;
|
||||
{
|
||||
if (!is_being_used_in_emulation || (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped()))
|
||||
@ -5158,14 +5310,18 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
*progress_dialog = get_localized_string(localized_string_id::PROGRESS_DIALOG_LINKING_PPU_MODULES);
|
||||
}
|
||||
|
||||
usz mod_index = umax;
|
||||
|
||||
for (const auto& [obj_name, is_compiled] : link_workload)
|
||||
{
|
||||
mod_index++;
|
||||
|
||||
if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (!failed_to_load && !jit->add(cache_path + obj_name))
|
||||
if (!failed_to_load && !jits[mod_index / c_moudles_per_jit]->add(cache_path + obj_name))
|
||||
{
|
||||
ppu_log.error("LLVM: Failed to load module %s", obj_name);
|
||||
failed_to_load = true;
|
||||
@ -5205,10 +5361,10 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
|
||||
progress_dialog = get_localized_string(localized_string_id::PROGRESS_DIALOG_APPLYING_PPU_CODE);
|
||||
|
||||
if (!jit)
|
||||
if (jits.empty())
|
||||
{
|
||||
// No functions - nothing to do
|
||||
ensure(info.funcs.empty());
|
||||
ensure(info.get_funcs().empty());
|
||||
return compiled_new;
|
||||
}
|
||||
|
||||
@ -5216,25 +5372,27 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
|
||||
if (is_first)
|
||||
{
|
||||
jit->fin();
|
||||
}
|
||||
|
||||
if (is_first)
|
||||
{
|
||||
jit_mod.symbol_resolver = reinterpret_cast<void(*)(u8*, u64)>(jit->get("__resolve_symbols"));
|
||||
ensure(jit_mod.symbol_resolver);
|
||||
}
|
||||
else
|
||||
{
|
||||
ensure(jit_mod.symbol_resolver);
|
||||
for (auto& jit : jits)
|
||||
{
|
||||
jit->fin();
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Symbol resolver is in JIT mem, so we must enable execution
|
||||
pthread_jit_write_protect_np(true);
|
||||
#endif
|
||||
{
|
||||
usz index = umax;
|
||||
|
||||
jit_mod.symbol_resolver(vm::g_exec_addr, info.segs[0].addr);
|
||||
for (auto& sim : jit_mod.symbol_resolvers)
|
||||
{
|
||||
index++;
|
||||
|
||||
sim = ensure(!is_first ? sim : reinterpret_cast<void(*)(u8*, u64)>(jits[index]->get("__resolve_symbols")));
|
||||
sim(vm::g_exec_addr, info.segs[0].addr);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Symbol resolver is in JIT mem, so we must enable execution
|
||||
@ -5242,7 +5400,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
#endif
|
||||
|
||||
// Find a BLR-only function in order to copy it to all BLRs (some games need it)
|
||||
for (const auto& func : info.funcs)
|
||||
for (const auto& func : info.get_funcs())
|
||||
{
|
||||
if (func.size == 4 && *info.get_ptr<u32>(func.addr) == ppu_instructions::BLR())
|
||||
{
|
||||
@ -5281,7 +5439,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name, const ppu_module<lv2_obj>& whole_module)
|
||||
static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name)
|
||||
{
|
||||
#ifdef LLVM_AVAILABLE
|
||||
using namespace llvm;
|
||||
@ -5307,8 +5465,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
||||
translator.get_type<u64>(), // r2
|
||||
}, false);
|
||||
|
||||
// Difference between function name and current location
|
||||
const u32 reloc = module_part.get_relocs().empty() ? 0 : ::at32(module_part.segs, 0).addr;
|
||||
|
||||
// Initialize function list
|
||||
for (const auto& func : module_part.funcs)
|
||||
for (const auto& func : module_part.get_funcs())
|
||||
{
|
||||
if (func.size)
|
||||
{
|
||||
@ -5374,8 +5535,14 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
||||
fpm.addPass(EarlyCSEPass());
|
||||
#endif
|
||||
|
||||
u32 guest_code_size = 0;
|
||||
u32 min_addr = umax;
|
||||
u32 max_addr = 0;
|
||||
u32 num_func = 0;
|
||||
|
||||
// Translate functions
|
||||
for (usz fi = 0, fmax = module_part.funcs.size(); fi < fmax; fi++)
|
||||
// Start with the lowest bound of the module, function list is sorted
|
||||
for (const auto& mod_func : module_part.get_funcs())
|
||||
{
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
@ -5383,10 +5550,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
||||
return;
|
||||
}
|
||||
|
||||
if (module_part.funcs[fi].size)
|
||||
if (mod_func.size)
|
||||
{
|
||||
num_func++;
|
||||
guest_code_size += mod_func.size;
|
||||
max_addr = std::max<u32>(max_addr, mod_func.addr + mod_func.size);
|
||||
min_addr = std::min<u32>(min_addr, mod_func.addr);
|
||||
|
||||
// Translate
|
||||
if (const auto func = translator.Translate(module_part.funcs[fi]))
|
||||
if (const auto func = translator.Translate(mod_func))
|
||||
{
|
||||
#ifdef ARCH_X64 // TODO
|
||||
// Run optimization passes
|
||||
@ -5405,10 +5577,10 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
||||
}
|
||||
}
|
||||
|
||||
// Run this only in one module for all functions
|
||||
if (&whole_module != &module_part)
|
||||
// Run this only in one module for all functions compiled
|
||||
if (module_part.jit_bounds)
|
||||
{
|
||||
if (const auto func = translator.GetSymbolResolver(whole_module))
|
||||
if (const auto func = translator.GetSymbolResolver(module_part))
|
||||
{
|
||||
#ifdef ARCH_X64 // TODO
|
||||
// Run optimization passes
|
||||
@ -5452,7 +5624,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
||||
return;
|
||||
}
|
||||
|
||||
ppu_log.notice("LLVM: %zu functions generated", _module->getFunctionList().size());
|
||||
ppu_log.notice("LLVM: %zu functions generated (code_size=0x%x, num_func=%d, max_addr(-)min_addr=0x%x)", _module->getFunctionList().size(), guest_code_size, num_func, max_addr - min_addr);
|
||||
}
|
||||
|
||||
// Load or compile module
|
||||
|
@ -114,7 +114,7 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||
const auto caddr = m_info.segs[0].addr;
|
||||
const auto cend = caddr + m_info.segs[0].size;
|
||||
|
||||
for (const auto& rel : m_info.relocs)
|
||||
for (const auto& rel : m_info.get_relocs())
|
||||
{
|
||||
if (rel.addr >= caddr && rel.addr < cend)
|
||||
{
|
||||
@ -162,7 +162,7 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_info.relocs.empty())
|
||||
if (!m_info.get_relocs().empty())
|
||||
{
|
||||
m_reloc = &m_info.segs[0];
|
||||
}
|
||||
@ -196,7 +196,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
// Instruction address is (m_addr + base)
|
||||
const u64 base = m_reloc ? m_reloc->addr : 0;
|
||||
m_addr = info.addr - base;
|
||||
m_attr = info.attr;
|
||||
m_attr = m_info.attr + info.attr;
|
||||
|
||||
// Don't emit check in small blocks without terminator
|
||||
bool need_check = info.size >= 16;
|
||||
@ -325,6 +325,9 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||
|
||||
Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
||||
{
|
||||
ensure(m_module->getFunction("__resolve_symbols") == nullptr);
|
||||
ensure(info.jit_bounds);
|
||||
|
||||
m_function = cast<Function>(m_module->getOrInsertFunction("__resolve_symbols", FunctionType::get(get_type<void>(), { get_type<u8*>(), get_type<u64>() }, false)).getCallee());
|
||||
|
||||
IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function));
|
||||
@ -351,12 +354,13 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
||||
// This is made in loop instead of inlined because it took tremendous amount of time to compile.
|
||||
|
||||
std::vector<u32> vec_addrs;
|
||||
vec_addrs.reserve(info.funcs.size());
|
||||
|
||||
// Create an array of function pointers
|
||||
std::vector<llvm::Constant*> functions;
|
||||
|
||||
for (const auto& f : info.funcs)
|
||||
const auto [min_addr, max_addr] = *ensure(info.jit_bounds);
|
||||
|
||||
for (const auto& f : info.get_funcs(false, true))
|
||||
{
|
||||
if (!f.size)
|
||||
{
|
||||
@ -379,7 +383,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
||||
const auto addr_array = new GlobalVariable(*m_module, addr_array_type, false, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, vec_addrs));
|
||||
|
||||
// Create an array of function pointers
|
||||
const auto func_table_type = ArrayType::get(ftype->getPointerTo(), info.funcs.size());
|
||||
const auto func_table_type = ArrayType::get(ftype->getPointerTo(), functions.size());
|
||||
const auto init_func_table = ConstantArray::get(func_table_type, functions);
|
||||
const auto func_table = new GlobalVariable(*m_module, func_table_type, false, GlobalVariable::PrivateLinkage, init_func_table);
|
||||
|
||||
|
@ -310,6 +310,11 @@ namespace utils
|
||||
|
||||
void memory_commit(void* pointer, usz size, protection prot)
|
||||
{
|
||||
if (!size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
ensure(::VirtualAlloc(pointer, size, MEM_COMMIT, +prot));
|
||||
#else
|
||||
@ -329,6 +334,11 @@ namespace utils
|
||||
|
||||
void memory_decommit(void* pointer, usz size)
|
||||
{
|
||||
if (!size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
ensure(::VirtualFree(pointer, size, MEM_DECOMMIT));
|
||||
#else
|
||||
@ -357,6 +367,11 @@ namespace utils
|
||||
|
||||
void memory_reset(void* pointer, usz size, protection prot)
|
||||
{
|
||||
if (!size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
memory_decommit(pointer, size);
|
||||
memory_commit(pointer, size, prot);
|
||||
@ -390,6 +405,11 @@ namespace utils
|
||||
|
||||
void memory_release(void* pointer, usz size)
|
||||
{
|
||||
if (!size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
unmap_mappping_memory(reinterpret_cast<u64>(pointer), size);
|
||||
ensure(::VirtualFree(pointer, 0, MEM_RELEASE));
|
||||
@ -400,6 +420,11 @@ namespace utils
|
||||
|
||||
void memory_protect(void* pointer, usz size, protection prot)
|
||||
{
|
||||
if (!size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
DWORD old;
|
||||
@ -429,6 +454,11 @@ namespace utils
|
||||
|
||||
bool memory_lock(void* pointer, usz size)
|
||||
{
|
||||
if (!size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
return ::VirtualLock(pointer, size);
|
||||
#else
|
||||
|
Loading…
x
Reference in New Issue
Block a user