mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-11 15:40:51 +00:00
SPU LLVM: LS Memory Mirrors (Optimize loads/stores)
This commit is contained in:
parent
c1a80b8146
commit
af1ceb1151
@ -173,7 +173,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
|
||||
{
|
||||
case MFC_LSA_offs:
|
||||
{
|
||||
if (value >= 0x40000)
|
||||
if (value >= SPU_LS_SIZE)
|
||||
{
|
||||
break;
|
||||
}
|
||||
@ -321,7 +321,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
|
||||
|
||||
void spu_load_exec(const spu_exec_object& elf)
|
||||
{
|
||||
auto ls0 = vm::cast(vm::falloc(RAW_SPU_BASE_ADDR, 0x80000, vm::spu));
|
||||
auto ls0 = vm::cast(vm::falloc(RAW_SPU_BASE_ADDR, SPU_LS_SIZE, vm::spu));
|
||||
auto spu = idm::make_ptr<named_thread<spu_thread>>("TEST_SPU", ls0, nullptr, 0, "", 0);
|
||||
|
||||
spu_thread::g_raw_spu_ctr++;
|
||||
@ -331,7 +331,7 @@ void spu_load_exec(const spu_exec_object& elf)
|
||||
{
|
||||
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
|
||||
{
|
||||
std::memcpy(vm::base(spu->offset + prog.p_vaddr), prog.bin.data(), prog.p_filesz);
|
||||
std::memcpy(spu->_ptr<void>(prog.p_vaddr), prog.bin.data(), prog.p_filesz);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -272,8 +272,7 @@ DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, sp
|
||||
|
||||
// Tail call, GHC CC (second arg)
|
||||
c.mov(x86::r13, args[0]);
|
||||
c.mov(x86::ebp, x86::dword_ptr(args[0], ::offset32(&spu_thread::offset)));
|
||||
c.add(x86::rbp, x86::qword_ptr(args[0], ::offset32(&spu_thread::memory_base_addr)));
|
||||
c.mov(x86::rbp, x86::qword_ptr(args[0], ::offset32(&spu_thread::ls)));
|
||||
c.mov(x86::r12, args[2]);
|
||||
c.xor_(x86::ebx, x86::ebx);
|
||||
c.jmp(args[1]);
|
||||
@ -1138,7 +1137,7 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
|
||||
}
|
||||
|
||||
// Find function
|
||||
const auto func = spu.jit->get_runtime().find(static_cast<u32*>(vm::base(spu.offset)), spu.pc);
|
||||
const auto func = spu.jit->get_runtime().find(static_cast<u32*>(spu._ptr<void>(0)), spu.pc);
|
||||
|
||||
if (!func)
|
||||
{
|
||||
@ -7902,13 +7901,51 @@ public:
|
||||
|
||||
void STQX(spu_opcode_t op)
|
||||
{
|
||||
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0));
|
||||
const auto a = get_vr(op.ra);
|
||||
const auto b = get_vr(op.rb);
|
||||
|
||||
for (auto pair : std::initializer_list<std::pair<value_t<u32[4]>, value_t<u32[4]>>>{{a, b}, {b, a}})
|
||||
{
|
||||
if (auto cv = llvm::dyn_cast<llvm::Constant>(pair.first.value))
|
||||
{
|
||||
v128 data = get_const_vector(cv, m_pos, 10000);
|
||||
data._u32[3] %= SPU_LS_SIZE;
|
||||
|
||||
if (data._u32[3] % 0x10 == 0)
|
||||
{
|
||||
value_t<u64> addr = eval(splat<u64>(data._u32[3]) + zext<u64>(extract(pair.second, 3) & 0x3fff0));
|
||||
make_store_ls(addr, get_vr<u8[16]>(op.rt));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
value_t<u64> addr = eval(zext<u64>((extract(a, 3) + extract(b, 3)) & 0x3fff0));
|
||||
make_store_ls(addr, get_vr<u8[16]>(op.rt));
|
||||
}
|
||||
|
||||
void LQX(spu_opcode_t op)
|
||||
{
|
||||
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0));
|
||||
const auto a = get_vr(op.ra);
|
||||
const auto b = get_vr(op.rb);
|
||||
|
||||
for (auto pair : std::initializer_list<std::pair<value_t<u32[4]>, value_t<u32[4]>>>{{a, b}, {b, a}})
|
||||
{
|
||||
if (auto cv = llvm::dyn_cast<llvm::Constant>(pair.first.value))
|
||||
{
|
||||
v128 data = get_const_vector(cv, m_pos, 10000);
|
||||
data._u32[3] %= SPU_LS_SIZE;
|
||||
|
||||
if (data._u32[3] % 0x10 == 0)
|
||||
{
|
||||
value_t<u64> addr = eval(splat<u64>(data._u32[3]) + zext<u64>(extract(pair.second, 3) & 0x3fff0));
|
||||
set_vr(op.rt, make_load_ls(addr));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
value_t<u64> addr = eval(zext<u64>((extract(a, 3) + extract(b, 3)) & 0x3fff0));
|
||||
set_vr(op.rt, make_load_ls(addr));
|
||||
}
|
||||
|
||||
@ -7928,7 +7965,7 @@ public:
|
||||
{
|
||||
value_t<u64> addr;
|
||||
addr.value = m_ir->CreateZExt(m_interp_magn ? m_interp_pc : get_pc(m_pos), get_type<u64>());
|
||||
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & 0x3fff0);
|
||||
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & (m_interp_magn ? 0x3fff0 : ~0xf));
|
||||
make_store_ls(addr, get_vr<u8[16]>(op.rt));
|
||||
}
|
||||
|
||||
@ -7936,7 +7973,7 @@ public:
|
||||
{
|
||||
value_t<u64> addr;
|
||||
addr.value = m_ir->CreateZExt(m_interp_magn ? m_interp_pc : get_pc(m_pos), get_type<u64>());
|
||||
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & 0x3fff0);
|
||||
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & (m_interp_magn ? 0x3fff0 : ~0xf));
|
||||
set_vr(op.rt, make_load_ls(addr));
|
||||
}
|
||||
|
||||
@ -7953,13 +7990,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + (get_imm<u32>(op.si10) << 4)) & 0x3fff0));
|
||||
value_t<u64> addr = eval(zext<u64>(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm<u64>(op.si10) << 4));
|
||||
make_store_ls(addr, get_vr<u8[16]>(op.rt));
|
||||
}
|
||||
|
||||
void LQD(spu_opcode_t op)
|
||||
{
|
||||
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + (get_imm<u32>(op.si10) << 4)) & 0x3fff0));
|
||||
value_t<u64> addr = eval(zext<u64>(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm<u64>(op.si10) << 4));
|
||||
set_vr(op.rt, make_load_ls(addr));
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "Utilities/JIT.h"
|
||||
#include "Utilities/asm.h"
|
||||
#include "Utilities/sysinfo.h"
|
||||
#include "Emu/Memory/vm.h"
|
||||
#include "Emu/Memory/vm_ptr.h"
|
||||
#include "Emu/Memory/vm_reservation.h"
|
||||
|
||||
@ -1111,7 +1112,7 @@ void spu_thread::cpu_task()
|
||||
continue;
|
||||
}
|
||||
|
||||
spu_runtime::g_gateway(*this, vm::_ptr<u8>(offset), nullptr);
|
||||
spu_runtime::g_gateway(*this, _ptr<u8>(0), nullptr);
|
||||
}
|
||||
|
||||
// Print some stats
|
||||
@ -1129,7 +1130,7 @@ void spu_thread::cpu_task()
|
||||
break;
|
||||
}
|
||||
|
||||
spu_runtime::g_interpreter(*this, vm::_ptr<u8>(offset), nullptr);
|
||||
spu_runtime::g_interpreter(*this, _ptr<u8>(0), nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1148,8 +1149,21 @@ void spu_thread::cpu_unmem()
|
||||
|
||||
spu_thread::~spu_thread()
|
||||
{
|
||||
// Deallocate Local Storage
|
||||
vm::dealloc_verbose_nothrow(offset);
|
||||
{
|
||||
const auto [_, shm] = vm::get(vm::any, offset)->get(offset);
|
||||
|
||||
for (s32 i = -1; i < 2; i++)
|
||||
{
|
||||
// Unmap LS mirrors
|
||||
shm->unmap_critical(ls + (i * SPU_LS_SIZE));
|
||||
}
|
||||
|
||||
// Deallocate Local Storage
|
||||
vm::dealloc_verbose_nothrow(offset);
|
||||
}
|
||||
|
||||
// Release LS mirrors area
|
||||
utils::memory_release(ls - SPU_LS_SIZE, SPU_LS_SIZE * 3);
|
||||
|
||||
// Deallocate RawSPU ID
|
||||
if (!group && offset >= RAW_SPU_BASE_ADDR)
|
||||
@ -1159,11 +1173,26 @@ spu_thread::~spu_thread()
|
||||
}
|
||||
}
|
||||
|
||||
spu_thread::spu_thread(vm::addr_t ls, lv2_spu_group* group, u32 index, std::string_view name, u32 lv2_id, bool is_isolated)
|
||||
spu_thread::spu_thread(vm::addr_t _ls, lv2_spu_group* group, u32 index, std::string_view name, u32 lv2_id, bool is_isolated)
|
||||
: cpu_thread(idm::last_id())
|
||||
, is_isolated(is_isolated)
|
||||
, index(index)
|
||||
, offset(ls)
|
||||
, offset(_ls)
|
||||
, ls([&]()
|
||||
{
|
||||
const auto [_, shm] = vm::get(vm::any, _ls)->get(_ls);
|
||||
const auto addr = static_cast<u8*>(utils::memory_reserve(SPU_LS_SIZE * 3));
|
||||
|
||||
for (u32 i = 0; i < 3; i++)
|
||||
{
|
||||
// Map LS mirrors
|
||||
const auto ptr = addr + (i * SPU_LS_SIZE);
|
||||
verify(HERE), shm->map_critical(ptr) == ptr;
|
||||
}
|
||||
|
||||
// Use the middle mirror
|
||||
return addr + SPU_LS_SIZE;
|
||||
}())
|
||||
, group(group)
|
||||
, lv2_id(lv2_id)
|
||||
, spu_tname(stx::shared_cptr<std::string>::make(name))
|
||||
@ -1233,7 +1262,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
}
|
||||
|
||||
u32 value;
|
||||
if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + args.size - 1 < 0x40000) // LS access
|
||||
if ((eal - RAW_SPU_BASE_ADDR) % RAW_SPU_OFFSET + args.size - 1 < SPU_LS_SIZE) // LS access
|
||||
{
|
||||
}
|
||||
else if (args.size == 4 && is_get && thread->read_reg(eal, value))
|
||||
@ -1258,7 +1287,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
{
|
||||
auto& spu = static_cast<spu_thread&>(*group->threads[group->threads_map[index]]);
|
||||
|
||||
if (offset + args.size - 1 < 0x40000) // LS access
|
||||
if (offset + args.size - 1 < SPU_LS_SIZE) // LS access
|
||||
{
|
||||
eal = spu.offset + offset; // redirect access
|
||||
}
|
||||
@ -1282,7 +1311,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
auto [dst, src] = [&]() -> std::pair<u8*, const u8*>
|
||||
{
|
||||
u8* dst = vm::_ptr<u8>(eal);
|
||||
u8* src = vm::_ptr<u8>(offset + lsa);
|
||||
u8* src = _ptr<u8>(lsa);
|
||||
|
||||
if (is_get)
|
||||
{
|
||||
@ -1638,6 +1667,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
||||
transfer.cmd = MFC(args.cmd & ~MFC_LIST_MASK);
|
||||
|
||||
args.lsa &= 0x3fff0;
|
||||
args.eal &= 0x3fff8;
|
||||
|
||||
u32 index = fetch_size;
|
||||
|
||||
@ -1650,7 +1680,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
||||
// Reset to elements array head
|
||||
index = 0;
|
||||
|
||||
const auto src = _ptr<const void>(args.eal & 0x3fff8);
|
||||
const auto src = _ptr<const void>(args.eal);
|
||||
const v128 data0 = v128::loadu(src, 0);
|
||||
const v128 data1 = v128::loadu(src, 1);
|
||||
const v128 data2 = v128::loadu(src, 2);
|
||||
@ -2947,7 +2977,7 @@ bool spu_thread::stop_and_signal(u32 code)
|
||||
spu_log.warning("STOP 0x0");
|
||||
|
||||
// HACK: find an ILA instruction
|
||||
for (u32 addr = pc; addr < 0x40000; addr += 4)
|
||||
for (u32 addr = pc; addr < SPU_LS_SIZE; addr += 4)
|
||||
{
|
||||
const u32 instr = _ref<u32>(addr);
|
||||
|
||||
|
@ -118,6 +118,11 @@ enum : u32
|
||||
SPU_STATUS_IS_ISOLATED = 0x80,
|
||||
};
|
||||
|
||||
enum : s32
|
||||
{
|
||||
SPU_LS_SIZE = 0x40000,
|
||||
};
|
||||
|
||||
enum : u32
|
||||
{
|
||||
SYS_SPU_THREAD_BASE_LOW = 0xf0000000,
|
||||
@ -636,6 +641,7 @@ public:
|
||||
|
||||
const u32 index; // SPU index
|
||||
const u32 offset; // SPU LS offset
|
||||
const std::add_pointer_t<u8> ls; // SPU LS pointer
|
||||
private:
|
||||
lv2_spu_group* const group; // SPU Thread Group (only safe to access in the spu thread itself)
|
||||
public:
|
||||
@ -682,7 +688,7 @@ public:
|
||||
template<typename T>
|
||||
inline to_be_t<T>* _ptr(u32 lsa)
|
||||
{
|
||||
return static_cast<to_be_t<T>*>(vm::base(offset + lsa));
|
||||
return reinterpret_cast<to_be_t<T>*>(ls + lsa);
|
||||
}
|
||||
|
||||
// Convert specified SPU LS address to a reference of specified (possibly converted to BE) type
|
||||
|
@ -397,7 +397,7 @@ error_code sys_spu_thread_initialize(ppu_thread& ppu, vm::ptr<u32> thread, u32 g
|
||||
sys_spu.warning("Unimplemented SPU Thread options (0x%x)", option);
|
||||
}
|
||||
|
||||
const vm::addr_t ls_addr{verify("SPU LS" HERE, vm::alloc(0x80000, vm::main))};
|
||||
const vm::addr_t ls_addr{verify("SPU LS" HERE, vm::alloc(SPU_LS_SIZE, vm::main))};
|
||||
|
||||
const u32 inited = group->init;
|
||||
|
||||
@ -579,7 +579,7 @@ error_code sys_spu_thread_group_create(ppu_thread& ppu, vm::ptr<u32> id, u32 num
|
||||
if (type & SYS_SPU_THREAD_GROUP_TYPE_COOPERATE_WITH_SYSTEM)
|
||||
{
|
||||
// Constant size, unknown what it means but it's definitely not for each spu thread alone
|
||||
mem_size = 0x40000;
|
||||
mem_size = SPU_LS_SIZE;
|
||||
use_scheduler = false;
|
||||
}
|
||||
else if (type & SYS_SPU_THREAD_GROUP_TYPE_NON_CONTEXT)
|
||||
@ -591,7 +591,7 @@ error_code sys_spu_thread_group_create(ppu_thread& ppu, vm::ptr<u32> id, u32 num
|
||||
else
|
||||
{
|
||||
// 256kb for each spu thread, probably for saving and restoring SPU LS (used by scheduler?)
|
||||
mem_size = 0x40000 * num;
|
||||
mem_size = SPU_LS_SIZE * num;
|
||||
}
|
||||
|
||||
if (num < min_threads || num > max_threads ||
|
||||
@ -1225,7 +1225,7 @@ error_code sys_spu_thread_write_ls(ppu_thread& ppu, u32 id, u32 lsa, u64 value,
|
||||
|
||||
sys_spu.trace("sys_spu_thread_write_ls(id=0x%x, lsa=0x%05x, value=0x%llx, type=%d)", id, lsa, value, type);
|
||||
|
||||
if (lsa >= 0x40000 || type > 8 || !type || (type | lsa) & (type - 1)) // check range and alignment
|
||||
if (lsa >= SPU_LS_SIZE || type > 8 || !type || (type | lsa) & (type - 1)) // check range and alignment
|
||||
{
|
||||
return CELL_EINVAL;
|
||||
}
|
||||
@ -1268,7 +1268,7 @@ error_code sys_spu_thread_read_ls(ppu_thread& ppu, u32 id, u32 lsa, vm::ptr<u64>
|
||||
|
||||
sys_spu.trace("sys_spu_thread_read_ls(id=0x%x, lsa=0x%05x, value=*0x%x, type=%d)", id, lsa, value, type);
|
||||
|
||||
if (lsa >= 0x40000 || type > 8 || !type || (type | lsa) & (type - 1)) // check range and alignment
|
||||
if (lsa >= SPU_LS_SIZE || type > 8 || !type || (type | lsa) & (type - 1)) // check range and alignment
|
||||
{
|
||||
return CELL_EINVAL;
|
||||
}
|
||||
@ -1831,7 +1831,7 @@ error_code sys_raw_spu_create(ppu_thread& ppu, vm::ptr<u32> id, vm::ptr<void> at
|
||||
index = 0;
|
||||
}
|
||||
|
||||
const vm::addr_t ls_addr{verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, 0x40000, vm::spu))};
|
||||
const vm::addr_t ls_addr{verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, SPU_LS_SIZE, vm::spu))};
|
||||
|
||||
const u32 tid = idm::make<named_thread<spu_thread>>(fmt::format("RawSPU[0x%x] ", index), ls_addr, nullptr, index, "", index);
|
||||
|
||||
@ -1879,7 +1879,7 @@ error_code sys_isolated_spu_create(ppu_thread& ppu, vm::ptr<u32> id, vm::ptr<voi
|
||||
index = 0;
|
||||
}
|
||||
|
||||
const vm::addr_t ls_addr{verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, 0x40000, vm::spu))};
|
||||
const vm::addr_t ls_addr{verify(HERE, vm::falloc(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * index, SPU_LS_SIZE, vm::spu))};
|
||||
|
||||
const auto thread = idm::make_ptr<named_thread<spu_thread>>(fmt::format("IsoSPU[0x%x] ", index), ls_addr, nullptr, index, "", index, true);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user