mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-09 18:40:27 +00:00
PPU interpreter improved
This commit is contained in:
parent
257b9a2015
commit
8262d56574
@ -13,7 +13,7 @@
|
||||
logs::channel cellGcmSys("cellGcmSys", logs::level::notice);
|
||||
|
||||
extern s32 cellGcmCallback(vm::ptr<CellGcmContextData> context, u32 count);
|
||||
extern void ppu_register_function_at(u32 addr, ppu_function_t ptr);
|
||||
extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr);
|
||||
|
||||
const u32 tiled_pitches[] = {
|
||||
0x00000000, 0x00000200, 0x00000300, 0x00000400,
|
||||
@ -385,7 +385,7 @@ s32 _cellGcmInitBody(vm::pptr<CellGcmContextData> context, u32 cmdSize, u32 ioSi
|
||||
vm::write32(gcm_info.context_addr + 0x44, 0xabadcafe);
|
||||
vm::write32(gcm_info.context_addr + 0x48, ppu_instructions::HACK(FIND_FUNC(cellGcmCallback)));
|
||||
vm::write32(gcm_info.context_addr + 0x4c, ppu_instructions::BLR());
|
||||
ppu_register_function_at(gcm_info.context_addr + 0x48, BIND_FUNC(cellGcmCallback));
|
||||
ppu_register_function_at(gcm_info.context_addr + 0x48, 8, BIND_FUNC(cellGcmCallback));
|
||||
|
||||
vm::_ref<CellGcmContextData>(gcm_info.context_addr) = current_context;
|
||||
context->set(gcm_info.context_addr);
|
||||
|
@ -115,6 +115,7 @@ cfg::set_entry g_cfg_load_libs(cfg::root.core, "Load libraries");
|
||||
|
||||
extern std::string ppu_get_function_name(const std::string& module, u32 fnid);
|
||||
extern std::string ppu_get_variable_name(const std::string& module, u32 vnid);
|
||||
extern void ppu_register_range(u32 addr, u32 size);
|
||||
|
||||
extern void sys_initialize_tls(ppu_thread&, u64, u32, u32, u32);
|
||||
|
||||
@ -760,10 +761,16 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf)
|
||||
fmt::throw_exception("vm::alloc() failed (size=0x%x)", mem_size);
|
||||
}
|
||||
|
||||
// Copy data
|
||||
// Copy segment data
|
||||
std::memcpy(vm::base(addr), prog.bin.data(), file_size);
|
||||
LOG_WARNING(LOADER, "**** Loaded to 0x%x (size=0x%x)", addr, mem_size);
|
||||
|
||||
// Initialize executable code if necessary
|
||||
if (prog.p_flags & 0x1)
|
||||
{
|
||||
ppu_register_range(addr, mem_size);
|
||||
}
|
||||
|
||||
segments.emplace_back(std::make_pair(addr, mem_size));
|
||||
}
|
||||
|
||||
@ -975,8 +982,15 @@ void ppu_load_exec(const ppu_exec_object& elf)
|
||||
if (!vm::falloc(addr, size, vm::main))
|
||||
fmt::throw_exception("vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size);
|
||||
|
||||
// Copy segment data
|
||||
std::memcpy(vm::base(addr), prog.bin.data(), prog.bin.size());
|
||||
|
||||
// Initialize executable code if necessary
|
||||
if (prog.p_flags & 0x1)
|
||||
{
|
||||
ppu_register_range(addr, size);
|
||||
}
|
||||
|
||||
segments.emplace_back(std::make_pair(addr, size));
|
||||
}
|
||||
}
|
||||
|
@ -73,12 +73,48 @@ extern void ppu_execute_function(ppu_thread& ppu, u32 index);
|
||||
|
||||
const auto s_ppu_compiled = static_cast<u32*>(memory_helper::reserve_memory(0x100000000));
|
||||
|
||||
extern void ppu_register_function_at(u32 addr, ppu_function_t ptr)
|
||||
// Get interpreter cache value
|
||||
static u32 ppu_cache(u32 addr)
|
||||
{
|
||||
// Select opcode table
|
||||
const auto& table = *(
|
||||
g_cfg_ppu_decoder.get() == ppu_decoder_type::precise ? &s_ppu_interpreter_precise.get_table() :
|
||||
g_cfg_ppu_decoder.get() == ppu_decoder_type::fast ? &s_ppu_interpreter_fast.get_table() :
|
||||
(fmt::throw_exception<std::logic_error>("Invalid PPU decoder"), nullptr));
|
||||
|
||||
return ::narrow<u32>(reinterpret_cast<std::uintptr_t>(table[ppu_decode(vm::read32(addr))]));
|
||||
}
|
||||
|
||||
extern void ppu_register_range(u32 addr, u32 size)
|
||||
{
|
||||
// Register executable range at
|
||||
memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, size);
|
||||
|
||||
while (size)
|
||||
{
|
||||
// TODO
|
||||
s_ppu_compiled[addr / 4] = 0;
|
||||
addr += 4;
|
||||
size -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr)
|
||||
{
|
||||
ppu_register_range(addr, size);
|
||||
|
||||
if (g_cfg_ppu_decoder.get() == ppu_decoder_type::llvm)
|
||||
{
|
||||
memory_helper::commit_page_memory(s_ppu_compiled + addr / 4, sizeof(s_ppu_compiled[0]));
|
||||
s_ppu_compiled[addr / 4] = (u32)(std::uintptr_t)ptr;
|
||||
s_ppu_compiled[addr / 4] = ::narrow<u32>(reinterpret_cast<std::uintptr_t>(ptr));
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize interpreter cache
|
||||
while (size)
|
||||
{
|
||||
s_ppu_compiled[addr / 4] = ppu_cache(addr);
|
||||
addr += 4;
|
||||
size -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
@ -197,15 +233,12 @@ void ppu_thread::exec_task()
|
||||
}
|
||||
|
||||
const auto base = vm::_ptr<const u8>(0);
|
||||
|
||||
// Select opcode table
|
||||
const auto& table = *(
|
||||
g_cfg_ppu_decoder.get() == ppu_decoder_type::precise ? &s_ppu_interpreter_precise.get_table() :
|
||||
g_cfg_ppu_decoder.get() == ppu_decoder_type::fast ? &s_ppu_interpreter_fast.get_table() :
|
||||
(fmt::throw_exception<std::logic_error>("Invalid PPU decoder"), nullptr));
|
||||
const auto cache = reinterpret_cast<const u8*>(s_ppu_compiled);
|
||||
const auto bswap4 = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
|
||||
|
||||
v128 _op;
|
||||
decltype(&ppu_interpreter::UNK) func0, func1, func2, func3;
|
||||
using func_t = decltype(&ppu_interpreter::UNK);
|
||||
func_t func0, func1, func2, func3, func4, func5;
|
||||
|
||||
while (true)
|
||||
{
|
||||
@ -215,42 +248,62 @@ void ppu_thread::exec_task()
|
||||
|
||||
// Decode single instruction (may be step)
|
||||
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + cia);
|
||||
if (table[ppu_decode(op)](*this, {op})) { cia += 4; }
|
||||
if (reinterpret_cast<func_t>((std::uintptr_t)s_ppu_compiled[cia / 4])(*this, {op})) { cia += 4; }
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cia % 16)
|
||||
{
|
||||
// Unaligned
|
||||
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + cia);
|
||||
if (reinterpret_cast<func_t>((std::uintptr_t)s_ppu_compiled[cia / 4])(*this, {op})) { cia += 4; }
|
||||
continue;
|
||||
}
|
||||
|
||||
// Reinitialize
|
||||
{
|
||||
const auto _ops = _mm_shuffle_epi8(_mm_lddqu_si128(reinterpret_cast<const __m128i*>(base + cia)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3));
|
||||
_op.vi = _ops;
|
||||
const v128 _i = v128::fromV(_mm_and_si128(_mm_or_si128(_mm_slli_epi32(_op.vi, 6), _mm_srli_epi32(_op.vi, 26)), _mm_set1_epi32(0x1ffff)));
|
||||
func0 = table[_i._u32[0]];
|
||||
func1 = table[_i._u32[1]];
|
||||
func2 = table[_i._u32[2]];
|
||||
func3 = table[_i._u32[3]];
|
||||
const v128 x = v128::fromV(_mm_load_si128(reinterpret_cast<const __m128i*>(cache + cia)));
|
||||
func0 = reinterpret_cast<func_t>((std::uintptr_t)x._u32[0]);
|
||||
func1 = reinterpret_cast<func_t>((std::uintptr_t)x._u32[1]);
|
||||
func2 = reinterpret_cast<func_t>((std::uintptr_t)x._u32[2]);
|
||||
func3 = reinterpret_cast<func_t>((std::uintptr_t)x._u32[3]);
|
||||
_op.vi = _mm_shuffle_epi8(_mm_load_si128(reinterpret_cast<const __m128i*>(base + cia)), bswap4);
|
||||
}
|
||||
|
||||
while (LIKELY(func0(*this, {_op._u32[0]})))
|
||||
{
|
||||
if (cia += 4, LIKELY(func1(*this, {_op._u32[1]})))
|
||||
cia += 4;
|
||||
|
||||
if (LIKELY(func1(*this, {_op._u32[1]})))
|
||||
{
|
||||
if (cia += 4, LIKELY(func2(*this, {_op._u32[2]})))
|
||||
cia += 4;
|
||||
|
||||
const v128 x = v128::fromV(_mm_load_si128(reinterpret_cast<const __m128i*>(cache + cia + 8)));
|
||||
func0 = reinterpret_cast<func_t>((std::uintptr_t)x._u32[0]);
|
||||
func1 = reinterpret_cast<func_t>((std::uintptr_t)x._u32[1]);
|
||||
func4 = reinterpret_cast<func_t>((std::uintptr_t)x._u32[2]);
|
||||
func5 = reinterpret_cast<func_t>((std::uintptr_t)x._u32[3]);
|
||||
|
||||
if (LIKELY(func2(*this, {_op._u32[2]})))
|
||||
{
|
||||
cia += 4;
|
||||
func0 = func3;
|
||||
|
||||
const auto _ops = _mm_shuffle_epi8(_mm_lddqu_si128(reinterpret_cast<const __m128i*>(base + cia + 4)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3));
|
||||
_op.vi = _mm_alignr_epi8(_ops, _op.vi, 12);
|
||||
const v128 _i = v128::fromV(_mm_and_si128(_mm_or_si128(_mm_slli_epi32(_op.vi, 6), _mm_srli_epi32(_op.vi, 26)), _mm_set1_epi32(0x1ffff)));
|
||||
func1 = table[_i._u32[1]];
|
||||
func2 = table[_i._u32[2]];
|
||||
func3 = table[_i._u32[3]];
|
||||
|
||||
if (UNLIKELY(test(state)))
|
||||
if (LIKELY(func3(*this, {_op._u32[3]})))
|
||||
{
|
||||
break;
|
||||
cia += 4;
|
||||
|
||||
func2 = func4;
|
||||
func3 = func5;
|
||||
|
||||
if (UNLIKELY(test(state)))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
_op.vi = _mm_shuffle_epi8(_mm_load_si128(reinterpret_cast<const __m128i*>(base + cia)), bswap4);
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -532,8 +585,14 @@ static void ppu_initialize()
|
||||
Emu.SetCPUThreadStop(ppu_thr_stop_data.addr());
|
||||
ppu_thr_stop_data[0] = ppu_instructions::HACK(1);
|
||||
ppu_thr_stop_data[1] = ppu_instructions::BLR();
|
||||
ppu_register_function_at(ppu_thr_stop_data.addr(), 8, nullptr);
|
||||
}
|
||||
|
||||
|
||||
for (const auto& func : *_funcs)
|
||||
{
|
||||
ppu_register_function_at(func.addr, func.size, nullptr);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -783,17 +842,15 @@ static void ppu_initialize()
|
||||
return;
|
||||
}
|
||||
|
||||
memory_helper::free_reserved_memory(s_ppu_compiled, 0x100000000); // TODO
|
||||
|
||||
// Get and install function addresses
|
||||
for (const auto& info : *_funcs)
|
||||
{
|
||||
if (info.size)
|
||||
{
|
||||
const std::uintptr_t link = jit->get(fmt::format("__0x%x", info.addr));
|
||||
ppu_register_function_at(info.addr, (ppu_function_t)link);
|
||||
s_ppu_compiled[info.addr / 4] = ::narrow<u32>(link);
|
||||
|
||||
LOG_NOTICE(PPU, "** Function __0x%x -> 0x%llx (size=0x%x, toc=0x%x, attr %#x)", info.addr, link, info.size, info.toc, info.attr);
|
||||
LOG_TRACE(PPU, "** Function __0x%x -> 0x%llx (size=0x%x, toc=0x%x, attr %#x)", info.addr, link, info.size, info.toc, info.attr);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user