From 33a1c743a43997199c0ea8bd89b10100e20e6d1e Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 18 May 2018 18:51:48 +0300 Subject: [PATCH] Transactions: move loops inside Rewrite loops in assembler (minor optimization) --- Utilities/JIT.cpp | 3 +- Utilities/JIT.h | 4 +- rpcs3/Emu/Cell/PPUThread.cpp | 93 ++++++++++++++++++------------------ rpcs3/Emu/Cell/SPUThread.cpp | 87 ++++++++++++--------------------- 4 files changed, 81 insertions(+), 106 deletions(-) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index da5716bd28..eec7e0a812 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -7,7 +7,7 @@ asmjit::JitRuntime& asmjit::get_global_runtime() return g_rt; } -void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback) +asmjit::Label asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback) { Label fall = c.newLabel(); Label begin = c.newLabel(); @@ -18,6 +18,7 @@ void asmjit::build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fall c.align(kAlignCode, 16); c.bind(begin); c.xbegin(fall); + return begin; } void asmjit::build_transaction_abort(asmjit::X86Assembler& c, unsigned char code) diff --git a/Utilities/JIT.h b/Utilities/JIT.h index cc5345e3df..40d9af4ada 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -12,8 +12,8 @@ namespace asmjit // Should only be used to build global functions JitRuntime& get_global_runtime(); - // Emit xbegin and adjacent loop - void build_transaction_enter(X86Assembler& c, Label fallback); + // Emit xbegin and adjacent loop, return label at xbegin + Label build_transaction_enter(X86Assembler& c, Label fallback); // Emit xabort void build_transaction_abort(X86Assembler& c, unsigned char code); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 9ef9545194..b7f337c5e9 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -944,8 +944,23 @@ static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) ppu.raddr = addr; + while (g_use_rtm) + { + ppu.rtime = vm::reservation_acquire(addr, sizeof(T)); + ppu.rdata = data; + + if (LIKELY(vm::reservation_acquire(addr, sizeof(T)) == ppu.rtime)) + { + return static_cast(ppu.rdata); + } + else + { + _mm_pause(); + } + } + // Do several attemps - for (uint i = 0; g_use_rtm || i < 5; i++) + for (uint i = 0; i < 5; i++) { ppu.rtime = vm::reservation_acquire(addr, sizeof(T)); _mm_lfence(); @@ -981,7 +996,7 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr) return ppu_load_acquire_reservation(ppu, addr); } -const auto ppu_stwcx_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto ppu_stwcx_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -998,9 +1013,10 @@ const auto ppu_stwcx_tx = build_function_asm 0) - { - vm::reservation_notifier(addr, sizeof(u32)).notify_all(); - ppu.raddr = 0; - return true; - } - - if (r < 0) - { - // Reservation lost - ppu.raddr = 0; - return false; - } + vm::reservation_notifier(addr, sizeof(u32)).notify_all(); + ppu.raddr = 0; + return true; } - // Give up + // Reservation lost ppu.raddr = 0; return false; } @@ -1078,7 +1085,7 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value) return result; } -const auto ppu_stdcx_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto ppu_stdcx_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -1095,9 +1102,10 @@ const auto ppu_stdcx_tx = build_function_asm 0) - { - vm::reservation_notifier(addr, sizeof(u64)).notify_all(); - ppu.raddr = 0; - return true; - } - - if (r < 0) - { - // Reservation lost - ppu.raddr = 0; - return false; - } + vm::reservation_notifier(addr, sizeof(u64)).notify_all(); + ppu.raddr = 0; + return true; } - // Give up + // Reservation lost ppu.raddr = 0; return false; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 5d56997b5b..362175c0a1 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -213,7 +213,7 @@ namespace spu } } -const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -228,6 +228,7 @@ const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) @@ -308,7 +317,7 @@ const auto spu_getll_tx = build_function_asm([]( c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0])); // Begin transaction - build_transaction_enter(c, fall); + Label begin = build_transaction_enter(c, fall); c.mov(x86::rax, x86::qword_ptr(x86::r10)); c.vmovaps(x86::ymm0, x86::yword_ptr(x86::r11, 0)); c.vmovaps(x86::ymm1, x86::yword_ptr(x86::r11, 32)); @@ -324,13 +333,13 @@ const auto spu_getll_tx = build_function_asm([]( // Touch memory after transaction failure c.bind(fall); + c.pause(); c.mov(x86::rax, x86::qword_ptr(x86::r11)); c.mov(x86::rax, x86::qword_ptr(x86::r10)); - c.mov(x86::eax, 1); - c.ret(); + c.jmp(begin); }); -const auto spu_putlluc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_putlluc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -352,7 +361,7 @@ const auto spu_putlluc_tx = build_function_asm(addr); const auto to_write = _ref(args.lsa & 0x3ffff); - vm::reservation_acquire(addr, 128); - // Store unconditionally - while (g_use_rtm) + if (g_use_rtm) { - if (spu_putlluc_tx(addr, to_write.data())) - { - vm::reservation_notifier(addr, 128).notify_all(); - tx_success++; - return; - } - - busy_wait(300); - tx_failure++; + spu_putlluc_tx(addr, to_write.data()); + vm::reservation_notifier(addr, 128).notify_all(); + return; } vm::writer_lock lock(0); @@ -1248,19 +1248,9 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) } } - while (g_use_rtm) + if (g_use_rtm) { rtime = spu_getll_tx(raddr, rdata.data()); - - if (rtime & 1) - { - tx_failure++; - busy_wait(300); - continue; - } - - tx_success++; - break; } // Do several attemps @@ -1312,28 +1302,13 @@ bool SPUThread::process_mfc_cmd(spu_mfc_cmd args) { if (g_use_rtm) { - // Do several attempts (TODO) - for (u32 i = 0; i < 3; i++) + if (spu_putllc_tx(raddr, rtime, rdata.data(), to_write.data())) { - const int r = spu_putllc_tx(raddr, rtime, rdata.data(), to_write.data()); - - if (r > 0) - { - vm::reservation_notifier(raddr, 128).notify_all(); - result = true; - tx_success++; - break; - } - - if (r < 0) - { - // Reservation lost - break; - } - - // Don't fallback to heavyweight lock, just give up - tx_failure++; + vm::reservation_notifier(raddr, 128).notify_all(); + result = true; } + + // Don't fallback to heavyweight lock, just give up } else if (rdata == data) {