mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-16 23:17:29 +00:00
SPU: load previous data on PUTLLC failure
Since it will most likely execute GETLLAR to load it again. Only implemented for TSX at moment.
This commit is contained in:
parent
d5667a859a
commit
425fce5070
@ -5985,6 +5985,8 @@ public:
|
||||
call("spu_memcpy", +spu_memcpy, dst, src, zext<u32>(size).eval(m_ir));
|
||||
}
|
||||
|
||||
// Disable certain thing
|
||||
m_ir->CreateStore(m_ir->getInt32(0), spu_ptr<u32>(&spu_thread::last_faddr));
|
||||
m_ir->CreateBr(next);
|
||||
break;
|
||||
}
|
||||
@ -7128,7 +7130,7 @@ public:
|
||||
set_vr(op.rt4, select(noncast<s32[4]>(c) != 0, get_vr<u32[4]>(op.rb), get_vr<u32[4]>(op.ra)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
bool sel_16 = true;
|
||||
for (u32 i = 0; i < 8; i++)
|
||||
{
|
||||
@ -7144,7 +7146,7 @@ public:
|
||||
set_vr(op.rt4, select(bitcast<s16[8]>(c) != 0, get_vr<u16[8]>(op.rb), get_vr<u16[8]>(op.ra)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
bool sel_8 = true;
|
||||
for (u32 i = 0; i < 16; i++)
|
||||
{
|
||||
|
@ -317,6 +317,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
||||
Label _ret = c.newLabel();
|
||||
Label skip = c.newLabel();
|
||||
Label next = c.newLabel();
|
||||
Label load = c.newLabel();
|
||||
|
||||
//if (utils::has_avx() && !s_tsx_avx)
|
||||
//{
|
||||
@ -473,7 +474,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
||||
// XABORT is expensive so finish with xend instead
|
||||
c.bind(fail);
|
||||
|
||||
// Load old data (unused)
|
||||
// Load old data to store back in rdata
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
@ -494,9 +495,8 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
||||
}
|
||||
|
||||
c.xend();
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
||||
c.jmp(_ret);
|
||||
c.jmp(load);
|
||||
|
||||
c.bind(skip);
|
||||
c.xend();
|
||||
@ -609,7 +609,7 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
||||
// XABORT is expensive so try to finish with xend instead
|
||||
c.bind(fail3);
|
||||
|
||||
// Load old data (unused)
|
||||
// Load previous data to store back to rdata
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
@ -639,6 +639,28 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, void*
|
||||
|
||||
c.bind(fail2);
|
||||
c.lock().sub(x86::qword_ptr(x86::rbx), 1);
|
||||
c.bind(load);
|
||||
|
||||
// Store previous data back to rdata
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::yword_ptr(args[2], 0), x86::ymm0);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 32), x86::ymm1);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 64), x86::ymm2);
|
||||
c.vmovaps(x86::yword_ptr(args[2], 96), x86::ymm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movaps(x86::oword_ptr(args[2], 0), x86::xmm0);
|
||||
c.movaps(x86::oword_ptr(args[2], 16), x86::xmm1);
|
||||
c.movaps(x86::oword_ptr(args[2], 32), x86::xmm2);
|
||||
c.movaps(x86::oword_ptr(args[2], 48), x86::xmm3);
|
||||
c.movaps(x86::oword_ptr(args[2], 64), x86::xmm4);
|
||||
c.movaps(x86::oword_ptr(args[2], 80), x86::xmm5);
|
||||
c.movaps(x86::oword_ptr(args[2], 96), x86::xmm6);
|
||||
c.movaps(x86::oword_ptr(args[2], 112), x86::xmm7);
|
||||
}
|
||||
|
||||
c.xor_(x86::eax, x86::eax);
|
||||
//c.jmp(_ret);
|
||||
|
||||
@ -1405,6 +1427,9 @@ spu_thread::~spu_thread()
|
||||
|
||||
// Free range lock
|
||||
vm::free_range_lock(range_lock);
|
||||
|
||||
perf_log.notice("Perf stats for transactions: success %u, failure %u", stx, ftx);
|
||||
perf_log.notice("Perf stats for PUTLLC reload: successs %u, failure %u", last_succ, last_fail);
|
||||
}
|
||||
|
||||
spu_thread::spu_thread(vm::addr_t _ls, lv2_spu_group* group, u32 index, std::string_view name, u32 lv2_id, bool is_isolated, u32 option)
|
||||
@ -1610,6 +1635,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||
u8* dst = nullptr;
|
||||
const u8* src = nullptr;
|
||||
|
||||
// Cleanup: if PUT or GET happens after PUTLLC failure, it's too complicated and it's easier to just give up
|
||||
last_faddr = 0;
|
||||
|
||||
std::tie(dst, src) = [&]() -> std::pair<u8*, const u8*>
|
||||
{
|
||||
u8* dst = vm::_ptr<u8>(eal);
|
||||
@ -2319,10 +2347,10 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
||||
{
|
||||
const bool ok = cpu_thread::suspend_all<+1>(this, [&]()
|
||||
{
|
||||
auto& data = *vm::get_super_ptr<spu_rdata_t>(addr);
|
||||
|
||||
if ((res & -128) == rtime)
|
||||
{
|
||||
auto& data = vm::_ref<spu_rdata_t>(addr);
|
||||
|
||||
if (cmp_rdata(rdata, data))
|
||||
{
|
||||
mov_rdata(data, to_write);
|
||||
@ -2331,13 +2359,31 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
||||
}
|
||||
}
|
||||
|
||||
// Save previous data
|
||||
mov_rdata(rdata, data);
|
||||
res -= 1;
|
||||
return false;
|
||||
});
|
||||
|
||||
return ok;
|
||||
if (ok)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
[[fallthrough]];
|
||||
}
|
||||
case 0:
|
||||
{
|
||||
if (addr == last_faddr)
|
||||
{
|
||||
last_fail++;
|
||||
}
|
||||
|
||||
last_faddr = addr;
|
||||
last_ftime = res.load() & -128;
|
||||
last_ftsc = __rdtsc();
|
||||
return false;
|
||||
}
|
||||
case 0: return false;
|
||||
default:
|
||||
{
|
||||
if (count > 60 && g_cfg.core.perf_report) [[unlikely]]
|
||||
@ -2345,9 +2391,17 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
||||
perf_log.warning("PUTLLC: took too long: %u", count);
|
||||
}
|
||||
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (addr == last_faddr)
|
||||
{
|
||||
last_succ++;
|
||||
}
|
||||
|
||||
last_faddr = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
auto [_oldd, _ok] = res.fetch_op([&](u64& r)
|
||||
@ -2639,16 +2693,42 @@ bool spu_thread::process_mfc_cmd()
|
||||
return true;
|
||||
case MFC_GETLLAR_CMD:
|
||||
{
|
||||
perf_meter<"GETLLAR"_u64> perf0;
|
||||
|
||||
const u32 addr = ch_mfc_cmd.eal & -128;
|
||||
const auto& data = vm::_ref<spu_rdata_t>(addr);
|
||||
|
||||
if (addr == last_faddr)
|
||||
{
|
||||
// TODO: make this configurable and possible to disable
|
||||
spu_log.trace(u8"GETLLAR after fail: addr=0x%x, time=%u c", last_faddr, (perf0.get() - last_ftsc));
|
||||
}
|
||||
|
||||
if (addr == last_faddr && perf0.get() - last_ftsc < 1000 && (vm::reservation_acquire(addr, 128) & -128) == last_ftime)
|
||||
{
|
||||
rtime = last_ftime;
|
||||
raddr = last_faddr;
|
||||
mov_rdata(_ref<spu_rdata_t>(ch_mfc_cmd.lsa & 0x3ff80), rdata);
|
||||
|
||||
ch_atomic_stat.set_value(MFC_GETLLAR_SUCCESS);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Silent failure
|
||||
last_faddr = 0;
|
||||
}
|
||||
|
||||
if (addr == raddr && !g_use_rtm && g_cfg.core.spu_getllar_polling_detection && rtime == vm::reservation_acquire(addr, 128) && cmp_rdata(rdata, data))
|
||||
{
|
||||
// Spinning, might as well yield cpu resources
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
perf_meter<"GETLLAR"_u64> perf0;
|
||||
// Reset perf
|
||||
perf_meter<'x'> dummy;
|
||||
perf0 = dummy;
|
||||
dummy.reset();
|
||||
}
|
||||
|
||||
alignas(64) spu_rdata_t temp;
|
||||
u64 ntime;
|
||||
@ -2768,14 +2848,22 @@ bool spu_thread::process_mfc_cmd()
|
||||
|
||||
case MFC_PUTLLC_CMD:
|
||||
{
|
||||
ch_atomic_stat.set_value(do_putllc(ch_mfc_cmd) ? MFC_PUTLLC_SUCCESS : MFC_PUTLLC_FAILURE);
|
||||
return true;
|
||||
if (do_putllc(ch_mfc_cmd))
|
||||
{
|
||||
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);
|
||||
}
|
||||
else
|
||||
{
|
||||
ch_atomic_stat.set_value(MFC_PUTLLC_FAILURE);
|
||||
}
|
||||
|
||||
return !test_stopped();
|
||||
}
|
||||
case MFC_PUTLLUC_CMD:
|
||||
{
|
||||
do_putlluc(ch_mfc_cmd);
|
||||
ch_atomic_stat.set_value(MFC_PUTLLUC_SUCCESS);
|
||||
return true;
|
||||
return !test_stopped();
|
||||
}
|
||||
case MFC_PUTQLLUC_CMD:
|
||||
{
|
||||
|
@ -756,6 +756,12 @@ public:
|
||||
u64 ftx = 0; // Failed transactions
|
||||
u64 stx = 0; // Succeeded transactions (pure counters)
|
||||
|
||||
u64 last_ftsc = 0;
|
||||
u64 last_ftime = 0;
|
||||
u32 last_faddr = 0;
|
||||
u64 last_fail = 0;
|
||||
u64 last_succ = 0;
|
||||
|
||||
std::array<v128, 0x4000> stack_mirror; // Return address information
|
||||
|
||||
const char* current_func{}; // Current STOP or RDCH blocking function
|
||||
|
Loading…
Reference in New Issue
Block a user