mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-16 23:17:29 +00:00
PPU/LLVM: Use a big table instead of an unordered_map to get compilation result.
The hashmap overhead may be really big in some case.
This commit is contained in:
parent
942f26509b
commit
5fe3ea6440
@ -5,6 +5,7 @@
|
||||
#include "Emu/Cell/PPUDisAsm.h"
|
||||
#include "Emu/Cell/PPULLVMRecompiler.h"
|
||||
#include "Emu/Memory/Memory.h"
|
||||
#include "Utilities/VirtualMemory.h"
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push, 0)
|
||||
#endif
|
||||
@ -36,6 +37,11 @@ using namespace ppu_recompiler_llvm;
|
||||
#error "ID Manager cannot be used in this module"
|
||||
#endif
|
||||
|
||||
// PS3 can address 32 bits aligned on 4 bytes boundaries : 2^30 pointers
|
||||
#define VIRTUAL_INSTRUCTION_COUNT 0x40000000
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
|
||||
u64 Compiler::s_rotate_mask[64][64];
|
||||
bool Compiler::s_rotate_mask_inited = false;
|
||||
|
||||
@ -289,12 +295,20 @@ RecompilationEngine::RecompilationEngine()
|
||||
, m_currentId(0)
|
||||
, m_last_cache_clear_time(std::chrono::high_resolution_clock::now())
|
||||
, m_compiler(*this, CPUHybridDecoderRecompiler::ExecuteFunction, CPUHybridDecoderRecompiler::ExecuteTillReturn, CPUHybridDecoderRecompiler::PollStatus) {
|
||||
|
||||
FunctionCache = (Executable *)memory_helper::reserve_memory(VIRTUAL_INSTRUCTION_COUNT * sizeof(Executable));
|
||||
// Each char can store 8 page status
|
||||
FunctionCachePagesCommited = (char *)malloc(VIRTUAL_INSTRUCTION_COUNT / (8 * PAGE_SIZE));
|
||||
memset(FunctionCachePagesCommited, 0, VIRTUAL_INSTRUCTION_COUNT / (8 * PAGE_SIZE));
|
||||
|
||||
m_compiler.RunAllTests();
|
||||
}
|
||||
|
||||
RecompilationEngine::~RecompilationEngine() {
|
||||
m_address_to_function.clear();
|
||||
join();
|
||||
memory_helper::free_reserved_memory(FunctionCache, VIRTUAL_INSTRUCTION_COUNT * sizeof(Executable));
|
||||
free(FunctionCachePagesCommited);
|
||||
}
|
||||
|
||||
Executable executeFunc;
|
||||
@ -312,18 +326,39 @@ std::pair<std::mutex, std::atomic<int> >* RecompilationEngine::GetMutexAndCounte
|
||||
return &(It->second);
|
||||
}
|
||||
|
||||
const Executable *RecompilationEngine::GetCompiledExecutableIfAvailable(u32 address)
|
||||
bool RecompilationEngine::isAddressCommited(u32 address) const
|
||||
{
|
||||
size_t offset = address * sizeof(Executable);
|
||||
size_t page = offset / 4096;
|
||||
// Since bool is stored in char, the char index is page / 8 (or page >> 3)
|
||||
// and we shr the value with the remaining bits (page & 7)
|
||||
return (FunctionCachePagesCommited[page >> 3] >> (page & 7)) & 1;
|
||||
}
|
||||
|
||||
void RecompilationEngine::commitAddress(u32 address)
|
||||
{
|
||||
size_t offset = address * sizeof(Executable);
|
||||
size_t page = offset / 4096;
|
||||
memory_helper::commit_page_memory((u8*)FunctionCache + page * 4096, 4096);
|
||||
// Reverse of isAddressCommited : we set the (page & 7)th bit of (page / 8) th char
|
||||
// in the array
|
||||
FunctionCachePagesCommited[page >> 3] |= (1 << (page & 7));
|
||||
}
|
||||
|
||||
const Executable RecompilationEngine::GetCompiledExecutableIfAvailable(u32 address)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_address_to_function_lock);
|
||||
if (!isAddressCommited(address / 4))
|
||||
commitAddress(address / 4);
|
||||
if (!Ini.LLVMExclusionRange.GetValue())
|
||||
return FunctionCache[address / 4];
|
||||
std::unordered_map<u32, ExecutableStorage>::iterator It = m_address_to_function.find(address);
|
||||
if (It == m_address_to_function.end())
|
||||
return nullptr;
|
||||
if (std::get<1>(It->second) == nullptr)
|
||||
return nullptr;
|
||||
u32 id = std::get<3>(It->second);
|
||||
if (Ini.LLVMExclusionRange.GetValue() && (id >= Ini.LLVMMinId.GetValue() && id <= Ini.LLVMMaxId.GetValue()))
|
||||
if (id >= Ini.LLVMMinId.GetValue() && id <= Ini.LLVMMaxId.GetValue())
|
||||
return nullptr;
|
||||
return &(std::get<0>(It->second));
|
||||
return std::get<0>(It->second);
|
||||
}
|
||||
|
||||
void RecompilationEngine::RemoveUnusedEntriesFromCache() {
|
||||
@ -558,6 +593,8 @@ void RecompilationEngine::CompileBlock(BlockEntry & block_entry) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_address_to_function_lock);
|
||||
std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = nullptr;
|
||||
if (!isAddressCommited(block_entry.cfg.start_address / 4))
|
||||
commitAddress(block_entry.cfg.start_address / 4);
|
||||
}
|
||||
|
||||
std::unordered_map<u32, std::pair<std::mutex, std::atomic<int>> >::iterator It2 = m_address_locks.find(block_entry.cfg.start_address);
|
||||
@ -585,6 +622,7 @@ void RecompilationEngine::CompileBlock(BlockEntry & block_entry) {
|
||||
m_currentId++;
|
||||
block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize();
|
||||
block_entry.is_compiled = true;
|
||||
FunctionCache[block_entry.cfg.start_address / 4] = compileResult.first;
|
||||
}
|
||||
|
||||
std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() {
|
||||
@ -737,11 +775,11 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread
|
||||
std::lock_guard<std::mutex> lock(mut->first);
|
||||
mut->second.fetch_add(1);
|
||||
}
|
||||
const Executable *executable = execution_engine->m_recompilation_engine->GetCompiledExecutableIfAvailable(ppu_state->PC);
|
||||
const Executable executable = execution_engine->m_recompilation_engine->GetCompiledExecutableIfAvailable(ppu_state->PC);
|
||||
if (executable)
|
||||
{
|
||||
auto entry = ppu_state->PC;
|
||||
u32 exit = (u32)(*executable)(ppu_state, 0);
|
||||
u32 exit = (u32)executable(ppu_state, 0);
|
||||
mut->second.fetch_sub(1);
|
||||
execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit);
|
||||
if (exit == 0)
|
||||
|
@ -1024,7 +1024,7 @@ namespace ppu_recompiler_llvm {
|
||||
* Get the executable for the specified address if a compiled version is
|
||||
* available, otherwise returns nullptr.
|
||||
**/
|
||||
const Executable *GetCompiledExecutableIfAvailable(u32 address);
|
||||
const Executable GetCompiledExecutableIfAvailable(u32 address);
|
||||
|
||||
/// Notify the recompilation engine about a newly detected trace. It takes ownership of the trace.
|
||||
void NotifyTrace(ExecutionTrace * execution_trace);
|
||||
@ -1111,6 +1111,17 @@ namespace ppu_recompiler_llvm {
|
||||
|
||||
int m_currentId;
|
||||
|
||||
// Store pointer to every compiled function/block.
|
||||
// We need to map every instruction in PS3 Ram so it's a big table
|
||||
// But a lot of it won't be accessed. Fortunatly virtual memory help here...
|
||||
Executable *FunctionCache;
|
||||
|
||||
// Bitfield recording page status in FunctionCache reserved memory.
|
||||
char *FunctionCachePagesCommited;
|
||||
|
||||
bool isAddressCommited(u32) const;
|
||||
void commitAddress(u32);
|
||||
|
||||
/// (function, module containing function, times hit, id).
|
||||
typedef std::tuple<Executable, std::unique_ptr<llvm::ExecutionEngine>, u32, u32> ExecutableStorage;
|
||||
/// Address to ordinal cahce. Key is address.
|
||||
|
Loading…
Reference in New Issue
Block a user