From b49b4c8096568558954e11220ca42ee62e9f18fb Mon Sep 17 00:00:00 2001 From: Rajko Stojadinovic Date: Mon, 28 Oct 2019 23:01:07 +0100 Subject: [PATCH] Rewrite the way LLVM JIT does memory allocation to allow use of more than one contiguous segment (#6771) --- Utilities/JIT.cpp | 343 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 284 insertions(+), 59 deletions(-) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index c95191bdb9..84a99f2ad2 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -1,4 +1,4 @@ -#include "types.h" +#include "types.h" #include "JIT.h" #include "StrFmt.h" #include "File.h" @@ -269,41 +269,237 @@ void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code #include #endif -// Memory manager mutex -shared_mutex s_mutex; - -// Size of virtual memory area reserved: 512 MB -static const u64 s_memory_size = 0x20000000; - -// Try to reserve a portion of virtual memory in the first 2 GB address space beforehand, if possible. -static void* const s_memory = []() -> void* +class LLVMSegmentAllocator { - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - llvm::InitializeNativeTargetAsmParser(); - LLVMLinkInMCJIT(); +public: + // Size of virtual memory area reserved: default 512MB + static constexpr u32 DEFAULT_SEGMENT_SIZE = 0x20000000; -#ifdef MAP_32BIT - auto ptr = ::mmap(nullptr, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_32BIT, -1, 0); - if (ptr != MAP_FAILED) - return ptr; -#else - for (u64 addr = 0x10000000; addr <= 0x80000000 - s_memory_size; addr += 0x1000000) + LLVMSegmentAllocator() { - if (auto ptr = utils::memory_reserve(s_memory_size, (void*)addr)) + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + llvm::InitializeNativeTargetAsmParser(); + LLVMLinkInMCJIT(); + + // Try to reserve as much virtual memory in the first 2 GB address space beforehand, if possible. + Segment found_segs[16]; + u32 num_segs = 0; +#ifdef MAP_32BIT + u64 max_size = 0x80000000u; + while (num_segs < 16) { - return ptr; + auto ptr = ::mmap(nullptr, max_size, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_32BIT, -1, 0); + if (ptr != MAP_FAILED) + found_segs[num_segs++] = Segment(ptr, u32(max_size)); + else if (max_size > 0x1000000) + max_size -= 0x1000000; + else + break; + } +#else + u64 start_addr = 0x10000000; + while (num_segs < 16) + { + u64 max_addr = 0; + u64 max_size = 0x1000000; + for (u64 addr = start_addr; addr <= (0x80000000u - max_size); addr += 0x1000000) + { + for (auto curr_size = max_size; (0x80000000u - curr_size) >= addr; curr_size += 0x1000000) + { + if (auto ptr = utils::memory_reserve(curr_size, (void*)addr)) + { + if (max_addr == 0 || max_size < curr_size) + { + max_addr = addr; + max_size = curr_size; + } + utils::memory_release(ptr, curr_size); + } + else + break; + } + } + + if (max_addr == 0) + break; + + if (auto ptr = utils::memory_reserve(max_size, (void*)max_addr)) + found_segs[num_segs++] = Segment(ptr, u32(max_size)); + + start_addr = max_addr + max_size; + } +#endif + if (num_segs) + { + if (num_segs > 1) + { + m_segs.resize(num_segs); + for (u32 i = 0; i < num_segs; i++) + m_segs[i] = found_segs[i]; + } + else + m_curr = found_segs[0]; + + return; + } + + if (auto ptr = utils::memory_reserve(DEFAULT_SEGMENT_SIZE)) + { + m_curr.addr = (u8*)ptr; + m_curr.size = DEFAULT_SEGMENT_SIZE; + m_curr.used = 0; } } -#endif - return utils::memory_reserve(s_memory_size); -}(); + void* allocate(u32 size) + { + if (m_curr.remaining() >= size) + return m_curr.advance(size); -static void* s_next = s_memory; + if (reserve(size)) + return m_curr.advance(size); + + return nullptr; + } + + bool reserve(u32 size) + { + if (size == 0) + return true; + + store_curr(); + + u32 best_idx = UINT_MAX; + for (u32 i = 0, segs_size = (u32)m_segs.size(); i < segs_size; i++) + { + const auto seg_remaining = m_segs[i].remaining(); + if (seg_remaining < size) + continue; + + if (best_idx == UINT_MAX || m_segs[best_idx].remaining() > seg_remaining) + best_idx = i; + } + + if (best_idx == UINT_MAX) + { + const auto size_to_reserve = (size > DEFAULT_SEGMENT_SIZE) ? ::align(size+4096, 4096) : DEFAULT_SEGMENT_SIZE; + if (auto ptr = utils::memory_reserve(size_to_reserve)) + { + best_idx = (u32)m_segs.size(); + m_segs.emplace_back(ptr, size_to_reserve); + } + else + return false; + } + + const auto& best_seg = m_segs[best_idx]; + if (best_seg.addr != m_curr.addr) + m_curr = best_seg; + + return true; + } + + std::pair current_segment() const { return std::make_pair(u64(m_curr.addr), m_curr.size); } + std::pair find_segment(u64 addr) const + { + for (const auto& seg: m_segs) + { + if (addr < (u64)seg.addr) + continue; + + const auto end_addr = u64(seg.addr) + seg.size; + if (addr < end_addr) + return std::make_pair(u64(seg.addr), seg.size); + } + + return std::make_pair(0, 0); + } + + void reset() + { + if (!m_segs.size()) + { + if (m_curr.addr != nullptr) + { + utils::memory_decommit(m_curr.addr, m_curr.size); + m_curr.used = 0; + } + return; + } + + if (store_curr()) + m_curr = Segment(); + + auto allocated_it = std::remove_if(m_segs.begin(), m_segs.end(), [](const Segment& seg) { return u64(seg.addr + seg.size) > 0x80000000u; }); + if (allocated_it != m_segs.end()) + { + for (auto it = allocated_it; it != m_segs.end(); ++it) + utils::memory_release(it->addr, it->size); + + m_segs.erase(allocated_it, m_segs.end()); + } + + for (auto& seg : m_segs) + { + utils::memory_decommit(seg.addr, seg.size); + seg.used = 0; + } + } + +private: + bool store_curr() + { + if (m_curr.addr != nullptr) + { + const auto wanted_addr = m_curr.addr; + auto existing_it = std::find_if(m_segs.begin(), m_segs.end(), [wanted_addr](const Segment& seg) { return seg.addr == wanted_addr; }); + if (existing_it != m_segs.end()) + existing_it->used = m_curr.used; + else + m_segs.push_back(m_curr); + + return true; + } + + return false; + } + + struct Segment + { + Segment() {} + Segment(void* addr, u32 size) : addr((u8*)addr), size(size) {} + + u8* addr = nullptr; + u32 size = 0; + u32 used = 0; + + u32 remaining() const + { + if (size > used) + return size - used; + + return 0; + } + void* advance(u32 offset) + { + const auto prev_used = used; + used += offset; + return &addr[prev_used]; + } + }; + + Segment m_curr; + std::vector m_segs; +}; + +// Memory manager mutex +static shared_mutex s_mutex; +// LLVM Memory allocator +static LLVMSegmentAllocator s_alloc; #ifdef _WIN32 -static std::deque> s_unwater; +static std::deque>> s_unwater; static std::vector> s_unwind; // .pdata #else static std::deque> s_unfire; @@ -331,9 +527,7 @@ extern void jit_finalize() s_unfire.clear(); #endif - utils::memory_decommit(s_memory, s_memory_size); - - s_next = s_memory; + s_alloc.reset(); } // Helper class @@ -376,17 +570,25 @@ struct MemoryManager : llvm::RTDyldMemoryManager } // Verify address for small code model - if ((u64)s_memory > 0x80000000 - s_memory_size ? (u64)addr - (u64)s_memory >= s_memory_size : addr >= 0x80000000) + const u64 code_start = u64(m_code_addr); + const s64 addr_diff = addr - code_start; + if (addr_diff < INT_MIN || addr_diff > INT_MAX) { // Lock memory manager std::lock_guard lock(s_mutex); // Allocate memory for trampolines + if (m_tramps) + { + const s64 tramps_diff = u64(m_tramps) - code_start; + if (tramps_diff < INT_MIN || tramps_diff > INT_MAX) + m_tramps = nullptr; //previously allocated trampoline section too far away now + } + if (!m_tramps) { - m_tramps = reinterpret_cast(s_next); - utils::memory_commit(s_next, 4096, utils::protection::wx); - s_next = (u8*)((u64)s_next + 4096); + m_tramps = reinterpret_cast(s_alloc.allocate(4096)); + utils::memory_commit(m_tramps, 4096, utils::protection::wx); } // Create a trampoline @@ -412,36 +614,57 @@ struct MemoryManager : llvm::RTDyldMemoryManager return {addr, llvm::JITSymbolFlags::Exported}; } - u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override + bool needsToReserveAllocationSpace() override { return true; } + void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, uintptr_t RODataSize, uint32_t RODataAlign, uintptr_t RWDataSize, uint32_t RWDataAlign) override { + const u32 wanted_code_size = ::align(u32(CodeSize), std::min(4096u, CodeAlign)); + const u32 wanted_rodata_size = ::align(u32(RODataSize), std::min(4096u, RODataAlign)); + const u32 wanted_rwdata_size = ::align(u32(RWDataSize), std::min(4096u, RWDataAlign)); + // Lock memory manager std::lock_guard lock(s_mutex); - // Simple allocation - const u64 next = ::align((u64)s_next + size, 4096); + // Setup segment for current module if needed + s_alloc.reserve(wanted_code_size + wanted_rodata_size + wanted_rwdata_size); + } - if (next > (u64)s_memory + s_memory_size) + u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override + { + void* ptr = nullptr; + const u32 wanted_size = ::align(u32(size), 4096); + { + // Lock memory manager + std::lock_guard lock(s_mutex); + + // Simple allocation + ptr = s_alloc.allocate(wanted_size); + } + + if (ptr == nullptr) { LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align); return nullptr; } + utils::memory_commit(ptr, size, utils::protection::wx); + m_code_addr = (u8*)ptr; - utils::memory_commit(s_next, size, utils::protection::wx); - m_code_addr = (u8*)s_next; - - LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), s_next, size, align); - return (u8*)std::exchange(s_next, (void*)next); + LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), ptr, size, align); + return (u8*)ptr; } u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override { - // Lock memory manager - std::lock_guard lock(s_mutex); + void* ptr = nullptr; + const u32 wanted_size = ::align(u32(size), 4096); + { + // Lock memory manager + std::lock_guard lock(s_mutex); - // Simple allocation - const u64 next = ::align((u64)s_next + size, 4096); + // Simple allocation + ptr = s_alloc.allocate(wanted_size); + } - if (next > (u64)s_memory + s_memory_size) + if (ptr == nullptr) { LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align); return nullptr; @@ -451,10 +674,10 @@ struct MemoryManager : llvm::RTDyldMemoryManager { } - utils::memory_commit(s_next, size); + utils::memory_commit(ptr, size); - LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), s_next, size, align, is_ro ? "ro" : "rw"); - return (u8*)std::exchange(s_next, (void*)next); + LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), ptr, size, align, is_ro ? "ro" : "rw"); + return (u8*)ptr; } bool finalizeMemory(std::string* = nullptr) override @@ -479,21 +702,22 @@ struct MemoryManager : llvm::RTDyldMemoryManager #ifdef _WIN32 // Lock memory manager std::lock_guard lock(s_mutex); - - // Use s_memory as a BASE, compute the difference - const u64 unwind_diff = (u64)addr - (u64)s_memory; - // Fix RUNTIME_FUNCTION records (.pdata section) - auto pdata = std::move(s_unwater.front()); + decltype(s_unwater)::value_type pdata_entry = std::move(s_unwater.front()); s_unwater.pop_front(); + // Use given memory segment as a BASE, compute the difference + const u64 segment_start = pdata_entry.first; + const u64 unwind_diff = (u64)addr - segment_start; + + auto& pdata = pdata_entry.second; for (auto& rf : pdata) { rf.UnwindData += static_cast(unwind_diff); } // Register .xdata UNWIND_INFO structs - if (!RtlAddFunctionTable(pdata.data(), (DWORD)pdata.size(), (u64)s_memory)) + if (!RtlAddFunctionTable(pdata.data(), (DWORD)pdata.size(), segment_start)) { LOG_ERROR(GENERAL, "RtlAddFunctionTable() failed! Error %u", GetLastError()); } @@ -635,8 +859,9 @@ struct EventListener : llvm::JITEventListener // Lock memory manager std::lock_guard lock(s_mutex); - // Use s_memory as a BASE, compute the difference - const u64 code_diff = (u64)m_mem.m_code_addr - (u64)s_memory; + // Use current memory segment as a BASE, compute the difference + const u64 segment_start = s_alloc.current_segment().first; + const u64 code_diff = u64(m_mem.m_code_addr) - segment_start; // Fix RUNTIME_FUNCTION records (.pdata section) for (auto& rf : rfs) @@ -645,7 +870,7 @@ struct EventListener : llvm::JITEventListener rf.EndAddress += static_cast(code_diff); } - s_unwater.emplace_back(std::move(rfs)); + s_unwater.emplace_back(segment_start, std::move(rfs)); } } #endif