From f2d2a6b605894cd514ac13a376f784bc1987adbd Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 19 May 2020 19:09:27 +0300 Subject: [PATCH] JIT cleanup for PPU LLVM Remove MemoryManager3 as unnecessary. Rewrite MemoryManager1 to use its own 512M reservations. Disabled unwind info registration on all platforms. Use 64-bit executable pointers under vm::g_exec_addr area. Stop relying on deploying PPU LLVM objects in first 2G of address space. Implement jit_module_manager, protect its data with mutex. --- Utilities/JIT.cpp | 616 ++++--------------------------- Utilities/JIT.h | 12 - rpcs3/Emu/CPU/CPUThread.h | 3 - rpcs3/Emu/Cell/PPUThread.cpp | 92 +++-- rpcs3/Emu/Cell/PPUTranslator.cpp | 4 +- rpcs3/Emu/System.cpp | 4 - 6 files changed, 109 insertions(+), 622 deletions(-) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 146d839882..da86e3b81e 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -82,7 +82,7 @@ static u8* add_jit_memory(std::size_t size, uint align) if (pos == umax) [[unlikely]] { - jit_log.warning("JIT: Out of memory (size=0x%x, align=0x%x, off=0x%x)", size, align, Off); + jit_log.error("JIT: Out of memory (size=0x%x, align=0x%x, off=0x%x)", size, align, Off); return nullptr; } @@ -268,7 +268,6 @@ void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code #include "llvm/Support/FormattedStream.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" -#include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/ObjectCache.h" #ifdef _MSC_VER #pragma warning(pop) @@ -282,290 +281,35 @@ void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code #include #endif -class LLVMSegmentAllocator +const bool jit_initialize = []() -> bool { -public: - // Size of virtual memory area reserved: default 512MB - static constexpr u32 DEFAULT_SEGMENT_SIZE = 0x20000000; + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + llvm::InitializeNativeTargetAsmParser(); + LLVMLinkInMCJIT(); + return true; +}(); - LLVMSegmentAllocator() - { - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - llvm::InitializeNativeTargetAsmParser(); - LLVMLinkInMCJIT(); - - // Try to reserve as much virtual memory in the first 2 GB address space beforehand, if possible. - Segment found_segs[16]; - u32 num_segs = 0; -#ifdef MAP_32BIT - u64 max_size = 0x80000000u; - while (num_segs < 16) - { - auto ptr = ::mmap(nullptr, max_size, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_32BIT, -1, 0); - if (ptr != reinterpret_cast(-1)) - found_segs[num_segs++] = Segment(ptr, static_cast(max_size)); - else if (max_size > 0x1000000) - max_size -= 0x1000000; - else - break; - } -#else - u64 start_addr = 0x10000000; - while (num_segs < 16) - { - u64 max_addr = 0; - u64 max_size = 0x1000000; - for (u64 addr = start_addr; addr <= (0x80000000u - max_size); addr += 0x1000000) - { - for (auto curr_size = max_size; (0x80000000u - curr_size) >= addr; curr_size += 0x1000000) - { - if (auto ptr = utils::memory_reserve(curr_size, reinterpret_cast(addr))) - { - if (max_addr == 0 || max_size < curr_size) - { - max_addr = addr; - max_size = curr_size; - } - utils::memory_release(ptr, curr_size); - } - else - break; - } - } - - if (max_addr == 0) - break; - - if (auto ptr = utils::memory_reserve(max_size, reinterpret_cast(max_addr))) - found_segs[num_segs++] = Segment(ptr, static_cast(max_size)); - - start_addr = max_addr + max_size; - } -#endif - if (num_segs) - { - if (num_segs > 1) - { - m_segs.resize(num_segs); - for (u32 i = 0; i < num_segs; i++) - m_segs[i] = found_segs[i]; - } - else - m_curr = found_segs[0]; - - return; - } - - if (auto ptr = utils::memory_reserve(DEFAULT_SEGMENT_SIZE)) - { - m_curr.addr = static_cast(ptr); - m_curr.size = DEFAULT_SEGMENT_SIZE; - m_curr.used = 0; - } - } - - void* allocate(u32 size) - { - if (m_curr.remaining() >= size) - return m_curr.advance(size); - - if (reserve(size)) - return m_curr.advance(size); - - return nullptr; - } - - bool reserve(u32 size) - { - if (size == 0) - return true; - - store_curr(); - - u32 best_idx = UINT_MAX; - for (u32 i = 0, segs_size = ::size32(m_segs); i < segs_size; i++) - { - const auto seg_remaining = m_segs[i].remaining(); - if (seg_remaining < size) - continue; - - if (best_idx == UINT_MAX || m_segs[best_idx].remaining() > seg_remaining) - best_idx = i; - } - - if (best_idx == UINT_MAX) - { - const auto size_to_reserve = (size > DEFAULT_SEGMENT_SIZE) ? ::align(size+4096, 4096) : DEFAULT_SEGMENT_SIZE; - if (auto ptr = utils::memory_reserve(size_to_reserve)) - { - best_idx = ::size32(m_segs); - m_segs.emplace_back(ptr, size_to_reserve); - } - else - return false; - } - - const auto& best_seg = m_segs[best_idx]; - if (best_seg.addr != m_curr.addr) - m_curr = best_seg; - - return true; - } - - std::pair current_segment() const - { - return std::make_pair(reinterpret_cast(m_curr.addr), m_curr.size); - } - - std::pair find_segment(u64 addr) const - { - for (const auto& seg: m_segs) - { - const u64 seg_addr = reinterpret_cast(seg.addr); - if (addr < seg_addr) - continue; - - const auto end_addr = seg_addr + seg.size; - if (addr < end_addr) - return std::make_pair(seg_addr, seg.size); - } - - return std::make_pair(0, 0); - } - - void reset() - { - if (m_segs.empty()) - { - if (m_curr.addr != nullptr) - { - utils::memory_decommit(m_curr.addr, m_curr.size); - m_curr.used = 0; - } - return; - } - - if (store_curr()) - m_curr = Segment(); - - auto allocated_it = std::remove_if(m_segs.begin(), m_segs.end(), [](const Segment& seg) - { - return reinterpret_cast(seg.addr + seg.size) > 0x80000000u; - }); - if (allocated_it != m_segs.end()) - { - for (auto it = allocated_it; it != m_segs.end(); ++it) - utils::memory_release(it->addr, it->size); - - m_segs.erase(allocated_it, m_segs.end()); - } - - for (auto& seg : m_segs) - { - utils::memory_decommit(seg.addr, seg.size); - seg.used = 0; - } - } - -private: - bool store_curr() - { - if (m_curr.addr != nullptr) - { - const auto wanted_addr = m_curr.addr; - auto existing_it = std::find_if(m_segs.begin(), m_segs.end(), [wanted_addr](const Segment& seg) { return seg.addr == wanted_addr; }); - if (existing_it != m_segs.end()) - existing_it->used = m_curr.used; - else - m_segs.push_back(m_curr); - - return true; - } - - return false; - } - - struct Segment - { - Segment() {} - Segment(void* addr, u32 size) - : addr(static_cast(addr)) - , size(size) - {} - - u8* addr = nullptr; - u32 size = 0; - u32 used = 0; - - u32 remaining() const - { - if (size > used) - return size - used; - - return 0; - } - void* advance(u32 offset) - { - const auto prev_used = used; - used += offset; - return &addr[prev_used]; - } - }; - - Segment m_curr; - std::vector m_segs; -}; - -// Memory manager mutex -static shared_mutex s_mutex; -// LLVM Memory allocator -static LLVMSegmentAllocator s_alloc; - -#ifdef _WIN32 -static std::deque>> s_unwater; -static std::vector> s_unwind; // .pdata -#else -static std::deque> s_unfire; -#endif - -// Reset memory manager -extern void jit_finalize() +// Simple memory manager +struct MemoryManager1 : llvm::RTDyldMemoryManager { -#ifdef _WIN32 - for (auto&& unwind : s_unwind) - { - if (!RtlDeleteFunctionTable(unwind.data())) - { - jit_log.fatal("RtlDeleteFunctionTable() failed! Error %u", GetLastError()); - } - } - - s_unwind.clear(); -#else - for (auto&& t : s_unfire) - { - llvm::RTDyldMemoryManager::deregisterEHFramesInProcess(t.first, t.second); - } - - s_unfire.clear(); -#endif - - s_alloc.reset(); -} - -// Helper class -struct MemoryManager : llvm::RTDyldMemoryManager -{ - std::unordered_map& m_link; - - std::array* m_tramps{}; - - u8* m_code_addr{}; // TODO - - MemoryManager(std::unordered_map& table) - : m_link(table) + // 256 MiB for code or data + static constexpr u64 c_max_size = 0x20000000 / 2; + + // Allocation unit + static constexpr u64 c_page_size = 4096; + + // Reserve 512 MiB + u8* const ptr = static_cast(utils::memory_reserve(c_max_size * 2)); + + u64 code_ptr = 0; + u64 data_ptr = c_max_size; + + MemoryManager1() = default; + + ~MemoryManager1() override { + utils::memory_release(ptr, c_max_size * 2); } [[noreturn]] static void null() @@ -575,185 +319,64 @@ struct MemoryManager : llvm::RTDyldMemoryManager llvm::JITSymbol findSymbol(const std::string& name) override { - auto& addr = m_link[name]; + u64 addr = RTDyldMemoryManager::getSymbolAddress(name); - // Find function address if (!addr) { - addr = RTDyldMemoryManager::getSymbolAddress(name); - - if (addr) - { - jit_log.warning("LLVM: Symbol requested: %s -> 0x%016llx", name, addr); - } - else - { - jit_log.error("LLVM: Linkage failed: %s", name); - addr = reinterpret_cast(null); - } - } - - // Verify address for small code model - const u64 code_start = reinterpret_cast(m_code_addr); - const s64 addr_diff = addr - code_start; - if (addr_diff < INT_MIN || addr_diff > INT_MAX) - { - // Lock memory manager - std::lock_guard lock(s_mutex); - - // Allocate memory for trampolines - if (m_tramps) - { - const s64 tramps_diff = reinterpret_cast(m_tramps) - code_start; - if (tramps_diff < INT_MIN || tramps_diff > INT_MAX) - m_tramps = nullptr; //previously allocated trampoline section too far away now - } - - if (!m_tramps) - { - m_tramps = reinterpret_cast(s_alloc.allocate(4096)); - utils::memory_commit(m_tramps, 4096, utils::protection::wx); - } - - // Create a trampoline - auto& data = *m_tramps++; - data[0x0] = 0xff; // JMP [rip+2] - data[0x1] = 0x25; - data[0x2] = 0x02; - data[0x3] = 0x00; - data[0x4] = 0x00; - data[0x5] = 0x00; - data[0x6] = 0x48; // MOV rax, imm64 (not executed) - data[0x7] = 0xb8; - std::memcpy(data.data() + 8, &addr, 8); - addr = reinterpret_cast(&data); - - // Reset pointer (memory page exhausted) - if ((reinterpret_cast(m_tramps) % 4096) == 0) - { - m_tramps = nullptr; - } + addr = reinterpret_cast(&null); } return {addr, llvm::JITSymbolFlags::Exported}; } - bool needsToReserveAllocationSpace() override { return true; } - void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, uintptr_t RODataSize, uint32_t RODataAlign, uintptr_t RWDataSize, uint32_t RWDataAlign) override + u8* allocate(u64& oldp, std::uintptr_t size, uint align, utils::protection prot) { - const u32 wanted_code_size = ::align(static_cast(CodeSize), std::min(4096u, CodeAlign)); - const u32 wanted_rodata_size = ::align(static_cast(RODataSize), std::min(4096u, RODataAlign)); - const u32 wanted_rwdata_size = ::align(static_cast(RWDataSize), std::min(4096u, RWDataAlign)); + if (align > c_page_size) + { + jit_log.fatal("JIT: Unsupported alignment (size=0x%x, align=0x%x)", size, align); + return nullptr; + } - // Lock memory manager - std::lock_guard lock(s_mutex); + const u64 olda = ::align(oldp, align); + const u64 newp = ::align(olda + size, align); - // Setup segment for current module if needed - s_alloc.reserve(wanted_code_size + wanted_rodata_size + wanted_rwdata_size); + if ((newp - 1) / c_max_size != oldp / c_max_size) + { + jit_log.fatal("JIT: Out of memory (size=0x%x, align=0x%x)", size, align); + return nullptr; + } + + if ((oldp - 1) / c_page_size != (newp - 1) / c_page_size) + { + // Allocate pages on demand + const u64 pagea = ::align(oldp, c_page_size); + const u64 psize = ::align(newp - pagea, c_page_size); + utils::memory_commit(this->ptr + pagea, psize, prot); + } + + // Update allocation counter + oldp = newp; + + return this->ptr + olda; } u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override { - void* ptr = nullptr; - const u32 wanted_size = ::align(static_cast(size), 4096); - { - // Lock memory manager - std::lock_guard lock(s_mutex); - - // Simple allocation - ptr = s_alloc.allocate(wanted_size); - } - - if (ptr == nullptr) - { - jit_log.fatal("LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align); - return nullptr; - } - utils::memory_commit(ptr, size, utils::protection::wx); - m_code_addr = static_cast(ptr); - - jit_log.notice("LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), ptr, size, align); - return static_cast(ptr); + return allocate(code_ptr, size, align, utils::protection::wx); } u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override { - void* ptr = nullptr; - const u32 wanted_size = ::align(static_cast(size), 4096); - { - // Lock memory manager - std::lock_guard lock(s_mutex); - - // Simple allocation - ptr = s_alloc.allocate(wanted_size); - } - - if (ptr == nullptr) - { - jit_log.fatal("LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align); - return nullptr; - } - - if (!is_ro) - { - } - - utils::memory_commit(ptr, size); - - jit_log.notice("LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), ptr, size, align, is_ro ? "ro" : "rw"); - return static_cast(ptr); + return allocate(data_ptr, size, align, utils::protection::rw); } bool finalizeMemory(std::string* = nullptr) override { - // Lock memory manager - std::lock_guard lock(s_mutex); - - // TODO: make only read-only sections read-only -//#ifdef _WIN32 -// DWORD op; -// VirtualProtect(s_memory, (u64)m_next - (u64)s_memory, PAGE_READONLY, &op); -// VirtualProtect(s_code_addr, s_code_size, PAGE_EXECUTE_READ, &op); -//#else -// ::mprotect(s_memory, (u64)m_next - (u64)s_memory, PROT_READ); -// ::mprotect(s_code_addr, s_code_size, PROT_READ | PROT_EXEC); -//#endif return false; } void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override { - // Lock memory manager - std::lock_guard lock(s_mutex); -#ifdef _WIN32 - // Fix RUNTIME_FUNCTION records (.pdata section) - decltype(s_unwater)::value_type pdata_entry = std::move(s_unwater.front()); - s_unwater.pop_front(); - - // Use given memory segment as a BASE, compute the difference - const u64 segment_start = pdata_entry.first; - const u64 unwind_diff = (u64)addr - segment_start; - - auto& pdata = pdata_entry.second; - for (auto& rf : pdata) - { - rf.UnwindData += static_cast(unwind_diff); - } - - // Register .xdata UNWIND_INFO structs - if (!RtlAddFunctionTable(pdata.data(), (DWORD)pdata.size(), segment_start)) - { - jit_log.error("RtlAddFunctionTable() failed! Error %u", GetLastError()); - } - else - { - s_unwind.emplace_back(std::move(pdata)); - } -#else - s_unfire.push_front(std::make_pair(addr, size)); -#endif - - return RTDyldMemoryManager::registerEHFramesInProcess(addr, size); } void deregisterEHFrames() override @@ -787,14 +410,6 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override { -#ifndef _WIN32 - RTDyldMemoryManager::registerEHFramesInProcess(addr, size); - { - // Lock memory manager - std::lock_guard lock(s_mutex); - s_unfire.push_front(std::make_pair(addr, size)); - } -#endif } void deregisterEHFrames() override @@ -802,109 +417,6 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager } }; -// Simple memory manager. I promise there will be no MemoryManager4. -struct MemoryManager3 : llvm::RTDyldMemoryManager -{ - std::vector> allocs; - - MemoryManager3() = default; - - ~MemoryManager3() override - { - for (auto& a : allocs) - { - utils::memory_release(a.first, a.second); - } - } - - u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override - { - u8* r = static_cast(utils::memory_reserve(size)); - utils::memory_commit(r, size, utils::protection::wx); - allocs.emplace_back(r, size); - return r; - } - - u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override - { - u8* r = static_cast(utils::memory_reserve(size)); - utils::memory_commit(r, size); - allocs.emplace_back(r, size); - return r; - } - - bool finalizeMemory(std::string* = nullptr) override - { - return false; - } - - void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override - { - } - - void deregisterEHFrames() override - { - } -}; - -// Helper class -struct EventListener : llvm::JITEventListener -{ - MemoryManager& m_mem; - - EventListener(MemoryManager& mem) - : m_mem(mem) - { - } - - void notifyObjectLoaded(ObjectKey K, const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override - { -#ifdef _WIN32 - for (auto it = obj.section_begin(), end = obj.section_end(); it != end; ++it) - { - llvm::StringRef name; - name = it->getName().get(); - - if (name == ".pdata") - { - llvm::StringRef data; - data = it->getContents().get(); - - std::vector rfs(data.size() / sizeof(RUNTIME_FUNCTION)); - - auto offsets = reinterpret_cast(rfs.data()); - - // Initialize .pdata section using relocation info - for (auto ri = it->relocation_begin(), end = it->relocation_end(); ri != end; ++ri) - { - if (ri->getType() == 3 /*R_X86_64_GOT32*/) - { - const u64 value = *reinterpret_cast(data.data() + ri->getOffset()); - offsets[ri->getOffset() / sizeof(DWORD)] = static_cast(value + ri->getSymbol()->getAddress().get()); - } - } - - // Lock memory manager - std::lock_guard lock(s_mutex); - - // Use current memory segment as a BASE, compute the difference - const u64 segment_start = s_alloc.current_segment().first; - const u64 code_diff = reinterpret_cast(m_mem.m_code_addr) - segment_start; - - // Fix RUNTIME_FUNCTION records (.pdata section) - for (auto& rf : rfs) - { - rf.BeginAddress += static_cast(code_diff); - rf.EndAddress += static_cast(code_diff); - } - - s_unwater.emplace_back(segment_start, std::move(rfs)); - } - } -#endif - } -}; - // Helper class class ObjectCache final : public llvm::ObjectCache { @@ -1107,20 +619,19 @@ std::string jit_compiler::cpu(const std::string& _cpu) } jit_compiler::jit_compiler(const std::unordered_map& _link, const std::string& _cpu, u32 flags) - : m_link(_link) - , m_cpu(cpu(_cpu)) + : m_cpu(cpu(_cpu)) { std::string result; auto null_mod = std::make_unique ("null_", m_context); - if (m_link.empty()) + if (_link.empty()) { std::unique_ptr mem; if (flags & 0x1) { - mem = std::make_unique(); + mem = std::make_unique(); } else { @@ -1141,21 +652,18 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co else { // Primary JIT - auto mem = std::make_unique(m_link); - m_jit_el = std::make_unique(*mem); - m_engine.reset(llvm::EngineBuilder(std::move(null_mod)) .setErrorStr(&result) .setEngineKind(llvm::EngineKind::JIT) - .setMCJITMemoryManager(std::move(mem)) + .setMCJITMemoryManager(std::make_unique()) .setOptLevel(llvm::CodeGenOpt::Aggressive) .setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small) .setMCPU(m_cpu) .create()); - if (m_engine) + for (auto&& [name, addr] : _link) { - m_engine->RegisterJITEventListener(m_jit_el.get()); + m_engine->addGlobalMapping(name, addr); } } diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 133832e752..f3b13218a1 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -135,15 +135,9 @@ class jit_compiler final // Local LLVM context llvm::LLVMContext m_context; - // JIT Event Listener - std::unique_ptr m_jit_el; - // Execution instance std::unique_ptr m_engine; - // Link table - std::unordered_map m_link; - // Arch std::string m_cpu; @@ -182,12 +176,6 @@ public: // Get CPU info static std::string cpu(const std::string& _cpu); - - // Check JIT purpose - bool is_primary() const - { - return !m_link.empty(); - } }; #endif diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index ac8c470cd0..25b3a6a992 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -27,9 +27,6 @@ enum class cpu_flag : u32 class cpu_thread { - // PPU cache backward compatibility hack - char dummy[sizeof(std::shared_ptr) - 8]; - public: u64 block_hash = 0; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 1555457580..6b827026fe 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -113,10 +113,9 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op); extern void do_cell_atomic_128_store(u32 addr, const void* to_write); // Get pointer to executable cache -template -static T& ppu_ref(u32 addr) +static u64& ppu_ref(u32 addr) { - return *reinterpret_cast(vm::g_exec_addr + u64{addr} * 2); + return *reinterpret_cast(vm::g_exec_addr + u64{addr} * 2); } // Get interpreter cache value @@ -128,8 +127,7 @@ static u64 ppu_cache(u32 addr) g_cfg.core.ppu_decoder == ppu_decoder_type::fast ? &g_ppu_interpreter_fast.get_table() : (fmt::throw_exception("Invalid PPU decoder"), nullptr)); - const u32 value = vm::read32(addr); - return u64{value} << 32 | ::narrow(reinterpret_cast(table[ppu_decode(value)])); + return reinterpret_cast(table[ppu_decode(vm::read32(addr))]); } static bool ppu_fallback(ppu_thread& ppu, ppu_opcode_t op) @@ -140,7 +138,6 @@ static bool ppu_fallback(ppu_thread& ppu, ppu_opcode_t op) } ppu_ref(ppu.cia) = ppu_cache(ppu.cia); - return false; } @@ -153,20 +150,17 @@ void ppu_recompiler_fallback(ppu_thread& ppu) } const auto& table = g_ppu_interpreter_fast.get_table(); - const auto cache = vm::g_exec_addr; while (true) { // Run instructions in interpreter - if (const u32 op = *reinterpret_cast(cache + u64{ppu.cia} * 2 + 4); - table[ppu_decode(op)](ppu, { op })) [[likely]] + if (const u32 op = vm::read32(ppu.cia); table[ppu_decode(op)](ppu, {op})) [[likely]] { ppu.cia += 4; continue; } - if (uptr func = *reinterpret_cast(cache + u64{ppu.cia} * 2); - func != reinterpret_cast(ppu_recompiler_fallback)) + if (uptr func = ppu_ref(ppu.cia); func != reinterpret_cast(ppu_recompiler_fallback)) { // We found a recompiler function at cia, return return; @@ -197,7 +191,8 @@ static bool ppu_check_toc(ppu_thread& ppu, ppu_opcode_t op) } // Fallback to the interpreter function - if (reinterpret_cast(ppu_cache(ppu.cia) & 0xffffffff)(ppu, op)) + const u64 val = ppu_cache(ppu.cia); + if (reinterpret_cast(val & 0xffffffff)(ppu, {static_cast(val >> 32)})) { ppu.cia += 4; } @@ -217,13 +212,12 @@ extern void ppu_register_range(u32 addr, u32 size) utils::memory_commit(&ppu_ref(addr), size * 2, utils::protection::rw); vm::page_protect(addr, align(size, 0x10000), 0, vm::page_executable); - const u32 fallback = ::narrow(g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? - reinterpret_cast(ppu_recompiler_fallback) : reinterpret_cast(ppu_fallback)); + const u64 fallback = g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? reinterpret_cast(ppu_recompiler_fallback) : reinterpret_cast(ppu_fallback); size &= ~3; // Loop assumes `size = n * 4`, enforce that by rounding down while (size) { - ppu_ref(addr) = u64{vm::read32(addr)} << 32 | fallback; + ppu_ref(addr) = fallback; addr += 4; size -= 4; } @@ -234,7 +228,7 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr) // Initialize specific function if (ptr) { - ppu_ref(addr) = ::narrow(reinterpret_cast(ptr)); + ppu_ref(addr) = reinterpret_cast(ptr); return; } @@ -254,11 +248,11 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr) } // Initialize interpreter cache - const u32 _break = ::narrow(reinterpret_cast(ppu_break)); + const u64 _break = reinterpret_cast(ppu_break); while (size) { - if (ppu_ref(addr) != _break) + if (ppu_ref(addr) != _break) { ppu_ref(addr) = ppu_cache(addr); } @@ -282,7 +276,8 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op) } // Fallback to the interpreter function - if (reinterpret_cast(ppu_cache(ppu.cia) & 0xffffffff)(ppu, op)) + const u64 val = ppu_cache(ppu.cia); + if (reinterpret_cast(val)(ppu, {vm::read32(ppu.cia).get()})) { ppu.cia += 4; } @@ -298,12 +293,12 @@ extern void ppu_breakpoint(u32 addr, bool isAdding) return; } - const auto _break = ::narrow(reinterpret_cast(&ppu_break)); + const u64 _break = reinterpret_cast(&ppu_break); if (isAdding) { // Set breakpoint - ppu_ref(addr) = _break; + ppu_ref(addr) = _break; } else { @@ -320,11 +315,11 @@ extern void ppu_set_breakpoint(u32 addr) return; } - const auto _break = ::narrow(reinterpret_cast(&ppu_break)); + const u64 _break = reinterpret_cast(&ppu_break); - if (ppu_ref(addr) != _break) + if (ppu_ref(addr) != _break) { - ppu_ref(addr) = _break; + ppu_ref(addr) = _break; } } @@ -336,9 +331,9 @@ extern void ppu_remove_breakpoint(u32 addr) return; } - const auto _break = ::narrow(reinterpret_cast(&ppu_break)); + const auto _break = reinterpret_cast(&ppu_break); - if (ppu_ref(addr) == _break) + if (ppu_ref(addr) == _break) { ppu_ref(addr) = ppu_cache(addr); } @@ -371,12 +366,12 @@ extern bool ppu_patch(u32 addr, u32 value) *vm::get_super_ptr(addr) = value; - const u32 _break = ::narrow(reinterpret_cast(&ppu_break)); - const u32 fallback = ::narrow(reinterpret_cast(&ppu_fallback)); + const u64 _break = reinterpret_cast(&ppu_break); + const u64 fallback = reinterpret_cast(&ppu_fallback); if (is_exec) { - if (ppu_ref(addr) != _break && ppu_ref(addr) != fallback) + if (ppu_ref(addr) != _break && ppu_ref(addr) != fallback) { ppu_ref(addr) = ppu_cache(addr); } @@ -734,7 +729,7 @@ void ppu_thread::exec_task() { while (!(state & (cpu_flag::ret + cpu_flag::exit + cpu_flag::stop + cpu_flag::dbg_global_stop))) { - reinterpret_cast(static_cast(ppu_ref(cia)))(*this); + reinterpret_cast(ppu_ref(cia))(*this); } return; @@ -747,7 +742,7 @@ void ppu_thread::exec_task() { const auto exec_op = [this](u64 op) { - return reinterpret_cast(op & 0xffffffff)(*this, { static_cast(op >> 32) }); + return reinterpret_cast(op)(*this, {vm::read32(cia).get()}); }; if (cia % 8 || state) [[unlikely]] @@ -1773,7 +1768,7 @@ extern void ppu_initialize(const ppu_module& info) if (g_cfg.core.ppu_debug && func.size && func.toc != umax) { s_ppu_toc->emplace(func.addr, func.toc); - ppu_ref(func.addr) = ::narrow(reinterpret_cast(&ppu_check_toc)); + ppu_ref(func.addr) = reinterpret_cast(&ppu_check_toc); } } @@ -1785,8 +1780,6 @@ extern void ppu_initialize(const ppu_module& info) { std::unordered_map link_table { - { "__mptr", reinterpret_cast(&vm::g_base_addr) }, - { "__cptr", reinterpret_cast(&vm::g_exec_addr) }, { "__trap", reinterpret_cast(&ppu_trap) }, { "__error", reinterpret_cast(&ppu_error) }, { "__check", reinterpret_cast(&ppu_check) }, @@ -1799,7 +1792,6 @@ extern void ppu_initialize(const ppu_module& info) { "__stdcx", reinterpret_cast(ppu_stdcx) }, { "__vexptefp", reinterpret_cast(sse_exp2_ps) }, { "__vlogefp", reinterpret_cast(sse_log2_ps) }, - { "__vperm", s_use_ssse3 ? reinterpret_cast(sse_altivec_vperm) : reinterpret_cast(sse_altivec_vperm_v0) }, // Obsolete { "__lvsl", reinterpret_cast(sse_altivec_lvsl) }, { "__lvsr", reinterpret_cast(sse_altivec_lvsr) }, { "__lvlx", s_use_ssse3 ? reinterpret_cast(sse_cellbe_lvlx) : reinterpret_cast(sse_cellbe_lvlx_v0) }, @@ -1808,7 +1800,6 @@ extern void ppu_initialize(const ppu_module& info) { "__stvrx", s_use_ssse3 ? reinterpret_cast(sse_cellbe_stvrx) : reinterpret_cast(sse_cellbe_stvrx_v0) }, { "__dcbz", reinterpret_cast(+[](u32 addr){ alignas(64) static constexpr u8 z[128]{}; do_cell_atomic_128_store(addr, z); }) }, { "__resupdate", reinterpret_cast(vm::reservation_update) }, - { "sys_config_io_event", reinterpret_cast(ppu_get_syscall(523)) }, }; for (u64 index = 0; index < 1024; index++) @@ -1862,6 +1853,7 @@ extern void ppu_initialize(const ppu_module& info) { std::vector vars; std::vector funcs; + std::shared_ptr pjit; }; struct jit_core_allocator @@ -1877,14 +1869,23 @@ extern void ppu_initialize(const ppu_module& info) } }; + struct jit_module_manager + { + shared_mutex mutex; + std::unordered_map map; + + jit_module& get(const std::string& name) + { + std::lock_guard lock(mutex); + return map.emplace(name, jit_module{}).first->second; + } + }; + // Permanently loaded compiled PPU modules (name -> data) - jit_module& jit_mod = g_fxo->get>()->emplace(cache_path + info.name, jit_module{}).first->second; + jit_module& jit_mod = g_fxo->get()->get(cache_path + info.name); // Compiler instance (deferred initialization) - std::shared_ptr jit; - - // Compiler mutex (global) - static shared_mutex jmutex; + std::shared_ptr& jit = jit_mod.pjit; // Global variables to initialize std::vector> globals; @@ -2087,7 +2088,7 @@ extern void ppu_initialize(const ppu_module& info) } // Write version, hash, CPU, settings - fmt::append(obj_name, "v3-tane-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); + fmt::append(obj_name, "v3-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); } if (Emu.IsStopped()) @@ -2182,8 +2183,6 @@ extern void ppu_initialize(const ppu_module& info) return; } - std::lock_guard lock(jmutex); - for (auto [obj_name, is_compiled] : link_workload) { if (Emu.IsStopped()) @@ -2208,7 +2207,6 @@ extern void ppu_initialize(const ppu_module& info) // Jit can be null if the loop doesn't ever enter. if (jit && jit_mod.vars.empty()) { - std::lock_guard lock(jmutex); jit->fin(); // Get and install function addresses @@ -2222,7 +2220,7 @@ extern void ppu_initialize(const ppu_module& info) { const u64 addr = jit->get(fmt::format("__0x%x", block.first - reloc)); jit_mod.funcs.emplace_back(reinterpret_cast(addr)); - ppu_ref(block.first) = ::narrow(addr); + ppu_ref(block.first) = addr; } } } @@ -2253,7 +2251,7 @@ extern void ppu_initialize(const ppu_module& info) { if (block.second) { - ppu_ref(block.first) = ::narrow(reinterpret_cast(jit_mod.funcs[index++])); + ppu_ref(block.first) = reinterpret_cast(jit_mod.funcs[index++]); } } } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index af189c15ee..796d167e64 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -52,7 +52,7 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo m_thread_type = StructType::create(m_context, thread_struct, "context_t"); // Callable - m_call = new GlobalVariable(*_module, ArrayType::get(GetType(), 0x80000000)->getPointerTo(), true, GlobalValue::ExternalLinkage, 0, fmt::format("__cptr%x", gsuffix)); + m_call = new GlobalVariable(*_module, ArrayType::get(GetType(), 0x40000000)->getPointerTo(), true, GlobalValue::ExternalLinkage, 0, fmt::format("__cptr%x", gsuffix)); m_call->setInitializer(ConstantPointerNull::get(cast(m_call->getType()->getPointerElementType()))); m_call->setExternallyInitialized(true); @@ -331,7 +331,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) } } - const auto pos = m_ir->CreateShl(m_ir->CreateLShr(indirect, 2, "", true), 1, "", true); + const auto pos = m_ir->CreateLShr(indirect, 2, "", true); const auto ptr = m_ir->CreateGEP(m_ir->CreateLoad(m_call), {m_ir->getInt64(0), pos}); indirect = m_ir->CreateIntToPtr(m_ir->CreateLoad(ptr), type->getPointerTo()); } diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index c2489676d6..d259edad58 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -1849,10 +1849,6 @@ void Emulator::Stop(bool restart) vm::close(); -#ifdef LLVM_AVAILABLE - extern void jit_finalize(); - jit_finalize(); -#endif jit_runtime::finalize(); if (restart)