diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp index c2ce883a91..c7298c8e25 100644 --- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp @@ -13,7 +13,7 @@ logs::channel cellGcmSys("cellGcmSys", logs::level::notice); extern s32 cellGcmCallback(vm::ptr context, u32 count); -extern void ppu_register_function_at(u32 addr, ppu_function_t ptr); +extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr); const u32 tiled_pitches[] = { 0x00000000, 0x00000200, 0x00000300, 0x00000400, @@ -385,7 +385,7 @@ s32 _cellGcmInitBody(vm::pptr context, u32 cmdSize, u32 ioSi vm::write32(gcm_info.context_addr + 0x44, 0xabadcafe); vm::write32(gcm_info.context_addr + 0x48, ppu_instructions::HACK(FIND_FUNC(cellGcmCallback))); vm::write32(gcm_info.context_addr + 0x4c, ppu_instructions::BLR()); - ppu_register_function_at(gcm_info.context_addr + 0x48, BIND_FUNC(cellGcmCallback)); + ppu_register_function_at(gcm_info.context_addr + 0x48, 8, BIND_FUNC(cellGcmCallback)); vm::_ref(gcm_info.context_addr) = current_context; context->set(gcm_info.context_addr); diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index b53a0787b8..fc88722a35 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -115,6 +115,7 @@ cfg::set_entry g_cfg_load_libs(cfg::root.core, "Load libraries"); extern std::string ppu_get_function_name(const std::string& module, u32 fnid); extern std::string ppu_get_variable_name(const std::string& module, u32 vnid); +extern void ppu_register_range(u32 addr, u32 size); extern void sys_initialize_tls(ppu_thread&, u64, u32, u32, u32); @@ -760,10 +761,16 @@ std::shared_ptr ppu_load_prx(const ppu_prx_object& elf) fmt::throw_exception("vm::alloc() failed (size=0x%x)", mem_size); } - // Copy data + // Copy segment data std::memcpy(vm::base(addr), prog.bin.data(), file_size); LOG_WARNING(LOADER, "**** Loaded to 0x%x (size=0x%x)", addr, mem_size); + // Initialize executable code if necessary + if (prog.p_flags & 0x1) + { + ppu_register_range(addr, mem_size); + } + segments.emplace_back(std::make_pair(addr, mem_size)); } @@ -975,8 +982,15 @@ void ppu_load_exec(const ppu_exec_object& elf) if (!vm::falloc(addr, size, vm::main)) fmt::throw_exception("vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size); + // Copy segment data std::memcpy(vm::base(addr), prog.bin.data(), prog.bin.size()); + // Initialize executable code if necessary + if (prog.p_flags & 0x1) + { + ppu_register_range(addr, size); + } + segments.emplace_back(std::make_pair(addr, size)); } } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 6feb8e81b4..99dae94713 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -73,12 +73,48 @@ extern void ppu_execute_function(ppu_thread& ppu, u32 index); const auto s_ppu_compiled = static_cast(memory_helper::reserve_memory(0x100000000)); -extern void ppu_register_function_at(u32 addr, ppu_function_t ptr) +// Get interpreter cache value +static u32 ppu_cache(u32 addr) { + // Select opcode table + const auto& table = *( + g_cfg_ppu_decoder.get() == ppu_decoder_type::precise ? &s_ppu_interpreter_precise.get_table() : + g_cfg_ppu_decoder.get() == ppu_decoder_type::fast ? &s_ppu_interpreter_fast.get_table() : + (fmt::throw_exception("Invalid PPU decoder"), nullptr)); + + return ::narrow(reinterpret_cast(table[ppu_decode(vm::read32(addr))])); +} + +extern void ppu_register_range(u32 addr, u32 size) +{ + // Register executable range at + memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, size); + + while (size) + { + // TODO + s_ppu_compiled[addr / 4] = 0; + addr += 4; + size -= 4; + } +} + +extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr) +{ + ppu_register_range(addr, size); + if (g_cfg_ppu_decoder.get() == ppu_decoder_type::llvm) { - memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, sizeof(s_ppu_compiled[0])); - s_ppu_compiled[addr / 4] = (u32)(std::uintptr_t)ptr; + s_ppu_compiled[addr / 4] = ::narrow(reinterpret_cast(ptr)); + return; + } + + // Initialize interpreter cache + while (size) + { + s_ppu_compiled[addr / 4] = ppu_cache(addr); + addr += 4; + size -= 4; } } @@ -197,15 +233,12 @@ void ppu_thread::exec_task() } const auto base = vm::_ptr(0); - - // Select opcode table - const auto& table = *( - g_cfg_ppu_decoder.get() == ppu_decoder_type::precise ? &s_ppu_interpreter_precise.get_table() : - g_cfg_ppu_decoder.get() == ppu_decoder_type::fast ? &s_ppu_interpreter_fast.get_table() : - (fmt::throw_exception("Invalid PPU decoder"), nullptr)); + const auto cache = reinterpret_cast(s_ppu_compiled); + const auto bswap4 = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); v128 _op; - decltype(&ppu_interpreter::UNK) func0, func1, func2, func3; + using func_t = decltype(&ppu_interpreter::UNK); + func_t func0, func1, func2, func3, func4, func5; while (true) { @@ -215,42 +248,62 @@ void ppu_thread::exec_task() // Decode single instruction (may be step) const u32 op = *reinterpret_cast*>(base + cia); - if (table[ppu_decode(op)](*this, {op})) { cia += 4; } + if (reinterpret_cast((std::uintptr_t)s_ppu_compiled[cia / 4])(*this, {op})) { cia += 4; } + continue; + } + + if (cia % 16) + { + // Unaligned + const u32 op = *reinterpret_cast*>(base + cia); + if (reinterpret_cast((std::uintptr_t)s_ppu_compiled[cia / 4])(*this, {op})) { cia += 4; } continue; } // Reinitialize { - const auto _ops = _mm_shuffle_epi8(_mm_lddqu_si128(reinterpret_cast(base + cia)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); - _op.vi = _ops; - const v128 _i = v128::fromV(_mm_and_si128(_mm_or_si128(_mm_slli_epi32(_op.vi, 6), _mm_srli_epi32(_op.vi, 26)), _mm_set1_epi32(0x1ffff))); - func0 = table[_i._u32[0]]; - func1 = table[_i._u32[1]]; - func2 = table[_i._u32[2]]; - func3 = table[_i._u32[3]]; + const v128 x = v128::fromV(_mm_load_si128(reinterpret_cast(cache + cia))); + func0 = reinterpret_cast((std::uintptr_t)x._u32[0]); + func1 = reinterpret_cast((std::uintptr_t)x._u32[1]); + func2 = reinterpret_cast((std::uintptr_t)x._u32[2]); + func3 = reinterpret_cast((std::uintptr_t)x._u32[3]); + _op.vi = _mm_shuffle_epi8(_mm_load_si128(reinterpret_cast(base + cia)), bswap4); } while (LIKELY(func0(*this, {_op._u32[0]}))) { - if (cia += 4, LIKELY(func1(*this, {_op._u32[1]}))) + cia += 4; + + if (LIKELY(func1(*this, {_op._u32[1]}))) { - if (cia += 4, LIKELY(func2(*this, {_op._u32[2]}))) + cia += 4; + + const v128 x = v128::fromV(_mm_load_si128(reinterpret_cast(cache + cia + 8))); + func0 = reinterpret_cast((std::uintptr_t)x._u32[0]); + func1 = reinterpret_cast((std::uintptr_t)x._u32[1]); + func4 = reinterpret_cast((std::uintptr_t)x._u32[2]); + func5 = reinterpret_cast((std::uintptr_t)x._u32[3]); + + if (LIKELY(func2(*this, {_op._u32[2]}))) { cia += 4; - func0 = func3; - const auto _ops = _mm_shuffle_epi8(_mm_lddqu_si128(reinterpret_cast(base + cia + 4)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); - _op.vi = _mm_alignr_epi8(_ops, _op.vi, 12); - const v128 _i = v128::fromV(_mm_and_si128(_mm_or_si128(_mm_slli_epi32(_op.vi, 6), _mm_srli_epi32(_op.vi, 26)), _mm_set1_epi32(0x1ffff))); - func1 = table[_i._u32[1]]; - func2 = table[_i._u32[2]]; - func3 = table[_i._u32[3]]; - - if (UNLIKELY(test(state))) + if (LIKELY(func3(*this, {_op._u32[3]}))) { - break; + cia += 4; + + func2 = func4; + func3 = func5; + + if (UNLIKELY(test(state))) + { + break; + } + + _op.vi = _mm_shuffle_epi8(_mm_load_si128(reinterpret_cast(base + cia)), bswap4); + continue; } - continue; + break; } break; } @@ -532,8 +585,14 @@ static void ppu_initialize() Emu.SetCPUThreadStop(ppu_thr_stop_data.addr()); ppu_thr_stop_data[0] = ppu_instructions::HACK(1); ppu_thr_stop_data[1] = ppu_instructions::BLR(); + ppu_register_function_at(ppu_thr_stop_data.addr(), 8, nullptr); } - + + for (const auto& func : *_funcs) + { + ppu_register_function_at(func.addr, func.size, nullptr); + } + return; } @@ -783,17 +842,15 @@ static void ppu_initialize() return; } - memory_helper::free_reserved_memory(s_ppu_compiled, 0x100000000); // TODO - // Get and install function addresses for (const auto& info : *_funcs) { if (info.size) { const std::uintptr_t link = jit->get(fmt::format("__0x%x", info.addr)); - ppu_register_function_at(info.addr, (ppu_function_t)link); + s_ppu_compiled[info.addr / 4] = ::narrow(link); - LOG_NOTICE(PPU, "** Function __0x%x -> 0x%llx (size=0x%x, toc=0x%x, attr %#x)", info.addr, link, info.size, info.toc, info.attr); + LOG_TRACE(PPU, "** Function __0x%x -> 0x%llx (size=0x%x, toc=0x%x, attr %#x)", info.addr, link, info.size, info.toc, info.attr); } }