Rewrite the way LLVM JIT does memory allocation to allow use of more than one contiguous segment (#6771)

2024-11-16 23:17:29 +00:00 · 2019-10-28 23:01:07 +01:00 · 2019-10-28 23:01:07 +01:00 · b49b4c8096
commit b49b4c8096
parent 42fc698186
1 changed files with 284 additions and 59 deletions
--- a/Utilities/JIT.cpp
+++ b/Utilities/JIT.cpp
@ -1,4 +1,4 @@
-#include "types.h"
+#include "types.h"
 #include "JIT.h"
 #include "StrFmt.h"
 #include "File.h"
@ -269,41 +269,237 @@ void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code
 #include <sys/mman.h>
 #endif
-// Memory manager mutex
+class LLVMSegmentAllocator
 shared_mutex s_mutex;
 // Size of virtual memory area reserved: 512 MB
 static const u64 s_memory_size = 0x20000000;
 // Try to reserve a portion of virtual memory in the first 2 GB address space beforehand, if possible.
 static void* const s_memory = []() -> void*
 {
-	llvm::InitializeNativeTarget();
+public:
-	llvm::InitializeNativeTargetAsmPrinter();
+	// Size of virtual memory area reserved: default 512MB
-	llvm::InitializeNativeTargetAsmParser();
+	static constexpr u32 DEFAULT_SEGMENT_SIZE = 0x20000000;
 	LLVMLinkInMCJIT();
-#ifdef MAP_32BIT
+	LLVMSegmentAllocator()
 	auto ptr = ::mmap(nullptr, s_memory_size, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_32BIT, -1, 0);
 	if (ptr != MAP_FAILED)
 		return ptr;
 #else
 	for (u64 addr = 0x10000000; addr <= 0x80000000 - s_memory_size; addr += 0x1000000)
 	{
-		if (auto ptr = utils::memory_reserve(s_memory_size, (void*)addr))
+		llvm::InitializeNativeTarget();
 		llvm::InitializeNativeTargetAsmPrinter();
 		llvm::InitializeNativeTargetAsmParser();
 		LLVMLinkInMCJIT();
 		// Try to reserve as much virtual memory in the first 2 GB address space beforehand, if possible.
 		Segment found_segs[16];
 		u32 num_segs = 0;
 #ifdef MAP_32BIT
 		u64 max_size = 0x80000000u;
 		while (num_segs < 16)
 		{
-			return ptr;
+			auto ptr = ::mmap(nullptr, max_size, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_32BIT, -1, 0);
 			if (ptr != MAP_FAILED)
 				found_segs[num_segs++] = Segment(ptr, u32(max_size));
 			else if (max_size > 0x1000000)
 				max_size -= 0x1000000;
 			else
 				break;
 		}
 #else
 		u64 start_addr = 0x10000000;
 		while (num_segs < 16)
 		{
 			u64 max_addr = 0;
 			u64 max_size = 0x1000000;
 			for (u64 addr = start_addr; addr <= (0x80000000u - max_size); addr += 0x1000000)
 			{
 				for (auto curr_size = max_size; (0x80000000u - curr_size) >= addr; curr_size += 0x1000000)
 				{
 					if (auto ptr = utils::memory_reserve(curr_size, (void*)addr))
 					{
 						if (max_addr == 0 || max_size < curr_size)
 						{
 							max_addr = addr;
 							max_size = curr_size;
 						}
 						utils::memory_release(ptr, curr_size);
 					}
 					else
 						break;
 				}
 			}
 			if (max_addr == 0)
 				break;
 			if (auto ptr = utils::memory_reserve(max_size, (void*)max_addr))
 				found_segs[num_segs++] = Segment(ptr, u32(max_size));
 			start_addr = max_addr + max_size;
 		}
 #endif
 		if (num_segs)
 		{
 			if (num_segs > 1)
 			{
 				m_segs.resize(num_segs);
 				for (u32 i = 0; i < num_segs; i++)
 					m_segs[i] = found_segs[i];
 			}
 			else
 				m_curr = found_segs[0];
 			return;
 		}
 		if (auto ptr = utils::memory_reserve(DEFAULT_SEGMENT_SIZE))
 		{
 			m_curr.addr = (u8*)ptr;
 			m_curr.size = DEFAULT_SEGMENT_SIZE;
 			m_curr.used = 0;
 		}
 	}
 #endif
-	return utils::memory_reserve(s_memory_size);
+	void* allocate(u32 size)
-}();
+	{
 		if (m_curr.remaining() >= size)
 			return m_curr.advance(size);
-static void* s_next = s_memory;
+		if (reserve(size))
 			return m_curr.advance(size);
 		return nullptr;
 	}
 	bool reserve(u32 size)
 	{
 		if (size == 0)
 			return true;
 		store_curr();
 		u32 best_idx = UINT_MAX;
 		for (u32 i = 0, segs_size = (u32)m_segs.size(); i < segs_size; i++)
 		{
 			const auto seg_remaining = m_segs[i].remaining();
 			if (seg_remaining < size)
 				continue;
 			if (best_idx == UINT_MAX || m_segs[best_idx].remaining() > seg_remaining)
 				best_idx = i;
 		}
 		if (best_idx == UINT_MAX)
 		{
 			const auto size_to_reserve = (size > DEFAULT_SEGMENT_SIZE) ? ::align(size+4096, 4096) : DEFAULT_SEGMENT_SIZE;
 			if (auto ptr = utils::memory_reserve(size_to_reserve))
 			{
 				best_idx = (u32)m_segs.size();
 				m_segs.emplace_back(ptr, size_to_reserve);
 			}
 			else
 				return false;
 		}
 		const auto& best_seg = m_segs[best_idx];
 		if (best_seg.addr != m_curr.addr)
 			m_curr = best_seg;
 		return true;
 	}
 	std::pair<u64, u32> current_segment() const { return std::make_pair(u64(m_curr.addr), m_curr.size); }
 	std::pair<u64, u32> find_segment(u64 addr) const
 	{
 		for (const auto& seg: m_segs)
 		{
 			if (addr < (u64)seg.addr)
 				continue;
 			const auto end_addr = u64(seg.addr) + seg.size;
 			if (addr < end_addr)
 				return std::make_pair(u64(seg.addr), seg.size);
 		}
 		return std::make_pair(0, 0);
 	}
 	void reset()
 	{
 		if (!m_segs.size())
 		{
 			if (m_curr.addr != nullptr)
 			{
 				utils::memory_decommit(m_curr.addr, m_curr.size);
 				m_curr.used = 0;
 			}
 			return;
 		}
 		if (store_curr())
 			m_curr = Segment();
 		auto allocated_it = std::remove_if(m_segs.begin(), m_segs.end(), [](const Segment& seg) { return u64(seg.addr + seg.size) > 0x80000000u; });
 		if (allocated_it != m_segs.end())
 		{
 			for (auto it = allocated_it; it != m_segs.end(); ++it)
 				utils::memory_release(it->addr, it->size);
 			m_segs.erase(allocated_it, m_segs.end());
 		}
 		for (auto& seg : m_segs)
 		{
 			utils::memory_decommit(seg.addr, seg.size);
 			seg.used = 0;
 		}
 	}
 private:
 	bool store_curr()
 	{
 		if (m_curr.addr != nullptr)
 		{
 			const auto wanted_addr = m_curr.addr;
 			auto existing_it = std::find_if(m_segs.begin(), m_segs.end(), [wanted_addr](const Segment& seg) { return seg.addr == wanted_addr; });
 			if (existing_it != m_segs.end())
 				existing_it->used = m_curr.used;
 			else
 				m_segs.push_back(m_curr);
 			return true;
 		}
 		return false;
 	}
 	struct Segment
 	{
 		Segment() {}
 		Segment(void* addr, u32 size) : addr((u8*)addr), size(size) {}
 		u8* addr = nullptr;
 		u32 size = 0;
 		u32 used = 0;
 		u32 remaining() const
 		{
 			if (size > used)
 				return size - used;
 			return 0;
 		}
 		void* advance(u32 offset)
 		{
 			const auto prev_used = used;
 			used += offset;
 			return &addr[prev_used];
 		}
 	};
 	Segment m_curr;
 	std::vector<Segment> m_segs;	
 };
 // Memory manager mutex
 static shared_mutex s_mutex;
 // LLVM Memory allocator
 static LLVMSegmentAllocator s_alloc;
 #ifdef _WIN32
-static std::deque<std::vector<RUNTIME_FUNCTION>> s_unwater;
+static std::deque<std::pair<u64, std::vector<RUNTIME_FUNCTION>>> s_unwater;
 static std::vector<std::vector<RUNTIME_FUNCTION>> s_unwind; // .pdata
 #else
 static std::deque<std::pair<u8*, std::size_t>> s_unfire;
@ -331,9 +527,7 @@ extern void jit_finalize()
 	s_unfire.clear();
 #endif
-	utils::memory_decommit(s_memory, s_memory_size);
+	s_alloc.reset();
 	s_next = s_memory;
 }
 // Helper class
@ -376,17 +570,25 @@ struct MemoryManager : llvm::RTDyldMemoryManager
 		}
 		// Verify address for small code model
-		if ((u64)s_memory > 0x80000000 - s_memory_size ? (u64)addr - (u64)s_memory >= s_memory_size : addr >= 0x80000000)
+		const u64 code_start = u64(m_code_addr);
 		const s64 addr_diff = addr - code_start;
 		if (addr_diff < INT_MIN || addr_diff > INT_MAX)
 		{
 			// Lock memory manager
 			std::lock_guard lock(s_mutex);
 			// Allocate memory for trampolines
 			if (m_tramps)
 			{
 				const s64 tramps_diff = u64(m_tramps) - code_start;
 				if (tramps_diff < INT_MIN || tramps_diff > INT_MAX) 
 					m_tramps = nullptr; //previously allocated trampoline section too far away now
 			}
 			if (!m_tramps)
 			{
-				m_tramps = reinterpret_cast<decltype(m_tramps)>(s_next);
+				m_tramps = reinterpret_cast<decltype(m_tramps)>(s_alloc.allocate(4096));
-				utils::memory_commit(s_next, 4096, utils::protection::wx);
+				utils::memory_commit(m_tramps, 4096, utils::protection::wx);
 				s_next = (u8*)((u64)s_next + 4096);
 			}
 			// Create a trampoline
@ -412,36 +614,57 @@ struct MemoryManager : llvm::RTDyldMemoryManager
 		return {addr, llvm::JITSymbolFlags::Exported};
 	}
-	u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
+	bool needsToReserveAllocationSpace() override { return true; }
 	void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, uintptr_t RODataSize, uint32_t RODataAlign, uintptr_t RWDataSize, uint32_t RWDataAlign) override
 	{
 		const u32 wanted_code_size = ::align(u32(CodeSize), std::min(4096u, CodeAlign));
 		const u32 wanted_rodata_size = ::align(u32(RODataSize), std::min(4096u, RODataAlign));
 		const u32 wanted_rwdata_size = ::align(u32(RWDataSize), std::min(4096u, RWDataAlign));
 		// Lock memory manager
 		std::lock_guard lock(s_mutex);
-		// Simple allocation
+		// Setup segment for current module if needed
-		const u64 next = ::align((u64)s_next + size, 4096);
+		s_alloc.reserve(wanted_code_size + wanted_rodata_size + wanted_rwdata_size);
 	}
-		if (next > (u64)s_memory + s_memory_size)
+	u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
 	{
 		void* ptr = nullptr;
 		const u32 wanted_size = ::align(u32(size), 4096);
 		{
 			// Lock memory manager
 			std::lock_guard lock(s_mutex);
 			// Simple allocation
 			ptr = s_alloc.allocate(wanted_size);
 		}
 		if (ptr == nullptr)
 		{
 			LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align);
 			return nullptr;
 		}
 		utils::memory_commit(ptr, size, utils::protection::wx);
 		m_code_addr = (u8*)ptr;
-		utils::memory_commit(s_next, size, utils::protection::wx);
+		LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), ptr, size, align);
-		m_code_addr = (u8*)s_next;
+		return (u8*)ptr;
 		LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x)", sec_id, sec_name.data(), s_next, size, align);
 		return (u8*)std::exchange(s_next, (void*)next);
 	}
 	u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
 	{
-		// Lock memory manager
+		void* ptr = nullptr;
-		std::lock_guard lock(s_mutex);
+		const u32 wanted_size = ::align(u32(size), 4096);
 		{
 			// Lock memory manager
 			std::lock_guard lock(s_mutex);
-		// Simple allocation
+			// Simple allocation
-		const u64 next = ::align((u64)s_next + size, 4096);
+			ptr = s_alloc.allocate(wanted_size);
 		}
-		if (next > (u64)s_memory + s_memory_size)
+		if (ptr == nullptr)
 		{
 			LOG_FATAL(GENERAL, "LLVM: Out of memory (size=0x%llx, aligned 0x%x)", size, align);
 			return nullptr;
@ -451,10 +674,10 @@ struct MemoryManager : llvm::RTDyldMemoryManager
 		{
 		}
-		utils::memory_commit(s_next, size);
+		utils::memory_commit(ptr, size);
-		LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), s_next, size, align, is_ro ? "ro" : "rw");
+		LOG_NOTICE(GENERAL, "LLVM: Data section %u '%s' allocated -> %p (size=0x%llx, aligned 0x%x, %s)", sec_id, sec_name.data(), ptr, size, align, is_ro ? "ro" : "rw");
-		return (u8*)std::exchange(s_next, (void*)next);
+		return (u8*)ptr;
 	}
 	bool finalizeMemory(std::string* = nullptr) override
@ -479,21 +702,22 @@ struct MemoryManager : llvm::RTDyldMemoryManager
 #ifdef _WIN32
 		// Lock memory manager
 		std::lock_guard lock(s_mutex);
 		// Use s_memory as a BASE, compute the difference
 		const u64 unwind_diff = (u64)addr - (u64)s_memory;
 		// Fix RUNTIME_FUNCTION records (.pdata section)
-		auto pdata = std::move(s_unwater.front());
+		decltype(s_unwater)::value_type pdata_entry = std::move(s_unwater.front());
 		s_unwater.pop_front();
 		// Use given memory segment as a BASE, compute the difference
 		const u64 segment_start = pdata_entry.first;
 		const u64 unwind_diff = (u64)addr - segment_start;
 		auto& pdata = pdata_entry.second;
 		for (auto& rf : pdata)
 		{
 			rf.UnwindData += static_cast<DWORD>(unwind_diff);
 		}
 		// Register .xdata UNWIND_INFO structs
-		if (!RtlAddFunctionTable(pdata.data(), (DWORD)pdata.size(), (u64)s_memory))
+		if (!RtlAddFunctionTable(pdata.data(), (DWORD)pdata.size(), segment_start))
 		{
 			LOG_ERROR(GENERAL, "RtlAddFunctionTable() failed! Error %u", GetLastError());
 		}
@ -635,8 +859,9 @@ struct EventListener : llvm::JITEventListener
 				// Lock memory manager
 				std::lock_guard lock(s_mutex);
-				// Use s_memory as a BASE, compute the difference
+				// Use current memory segment as a BASE, compute the difference
-				const u64 code_diff = (u64)m_mem.m_code_addr - (u64)s_memory;
+				const u64 segment_start = s_alloc.current_segment().first;
 				const u64 code_diff = u64(m_mem.m_code_addr) - segment_start;
 				// Fix RUNTIME_FUNCTION records (.pdata section)
 				for (auto& rf : rfs)
@ -645,7 +870,7 @@ struct EventListener : llvm::JITEventListener
 					rf.EndAddress   += static_cast<DWORD>(code_diff);
 				}
-				s_unwater.emplace_back(std::move(rfs));
+				s_unwater.emplace_back(segment_start, std::move(rfs));
 			}
 		}
 #endif