From cb2748ae089e877fd0867ef5ab95ae7b6f2dca6c Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 28 Dec 2021 22:25:36 +0300 Subject: [PATCH] Update ASMJIT (new upstream API) --- .gitmodules | 2 +- 3rdparty/asmjit/asmjit | 2 +- 3rdparty/asmjit/asmjit.vcxproj | 184 ++++++++++++++++++------- 3rdparty/asmjit/asmjit.vcxproj.filters | 184 ++++++++++++++++++------- Utilities/JIT.cpp | 154 +++++++-------------- Utilities/JIT.h | 96 ++++++------- Utilities/Thread.cpp | 4 +- rpcs3/Emu/Cell/PPUFunction.cpp | 6 +- rpcs3/Emu/Cell/PPUThread.cpp | 40 +++--- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 176 ++++++++++++----------- rpcs3/Emu/Cell/SPUASMJITRecompiler.h | 36 ++--- rpcs3/Emu/Cell/SPUInterpreter.cpp | 8 +- rpcs3/Emu/Cell/SPURecompiler.cpp | 6 +- rpcs3/Emu/Cell/SPUThread.cpp | 86 ++++++------ rpcs3/Emu/RSX/Common/BufferUtils.cpp | 24 ++-- 15 files changed, 550 insertions(+), 458 deletions(-) diff --git a/.gitmodules b/.gitmodules index 2379e83933..fac2313a0f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,7 +5,7 @@ [submodule "asmjit"] path = 3rdparty/asmjit/asmjit url = ../../asmjit/asmjit.git - branch = oldstable + branch = aarch64 ignore = dirty [submodule "llvm"] path = llvm diff --git a/3rdparty/asmjit/asmjit b/3rdparty/asmjit/asmjit index 723f58581a..eae7197fce 160000 --- a/3rdparty/asmjit/asmjit +++ b/3rdparty/asmjit/asmjit @@ -1 +1 @@ -Subproject commit 723f58581afc0f4cb16ba13396ff77e425896847 +Subproject commit eae7197fce03fd52a6e71ca89207a88ce270fb1a diff --git a/3rdparty/asmjit/asmjit.vcxproj b/3rdparty/asmjit/asmjit.vcxproj index a786d83b84..6b1af24e38 100644 --- a/3rdparty/asmjit/asmjit.vcxproj +++ b/3rdparty/asmjit/asmjit.vcxproj @@ -11,66 +11,146 @@ - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + + - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + + + + + + - + + + + + + + + + + + + + + + + + + {AC40FF01-426E-4838-A317-66354CEFAE88} diff --git a/3rdparty/asmjit/asmjit.vcxproj.filters b/3rdparty/asmjit/asmjit.vcxproj.filters index 0505172685..ac62c220ba 100644 --- a/3rdparty/asmjit/asmjit.vcxproj.filters +++ b/3rdparty/asmjit/asmjit.vcxproj.filters @@ -1,65 +1,145 @@  - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + + - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + + + + + + - + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 7029850d25..ced02827e1 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -113,8 +113,32 @@ static u8* add_jit_memory(usz size, uint align) return pointer + pos; } +const asmjit::Environment& jit_runtime_base::environment() const noexcept +{ + static const asmjit::Environment g_env = asmjit::Environment::host(); + + return g_env; +} + +void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept +{ + ensure(!code->flatten()); + ensure(!code->resolveUnresolvedLinks()); + usz codeSize = ensure(code->codeSize()); + auto p = ensure(this->_alloc(codeSize, 64)); + ensure(!code->relocateToBase(uptr(p))); + + asmjit::VirtMem::ProtectJitReadWriteScope rwScope(p, codeSize); + + for (asmjit::Section* section : code->_sections) + { + std::memcpy(p + section->offset(), section->data(), section->bufferSize()); + } + + return p; +} + jit_runtime::jit_runtime() - : HostRuntime() { } @@ -122,38 +146,9 @@ jit_runtime::~jit_runtime() { } -asmjit::Error jit_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept +uchar* jit_runtime::_alloc(usz size, usz align) noexcept { - usz codeSize = code->getCodeSize(); - if (!codeSize) [[unlikely]] - { - *dst = nullptr; - return asmjit::kErrorNoCodeGenerated; - } - - void* p = jit_runtime::alloc(codeSize, 16); - if (!p) [[unlikely]] - { - *dst = nullptr; - return asmjit::kErrorNoVirtualMemory; - } - - usz relocSize = code->relocate(p); - if (!relocSize) [[unlikely]] - { - *dst = nullptr; - return asmjit::kErrorInvalidState; - } - - flush(p, relocSize); - *dst = p; - - return asmjit::kErrorOk; -} - -asmjit::Error jit_runtime::_release(void*) noexcept -{ - return asmjit::kErrorOk; + return jit_runtime::alloc(size, align, true); } u8* jit_runtime::alloc(usz size, uint align, bool exec) noexcept @@ -200,12 +195,12 @@ void jit_runtime::finalize() noexcept std::memcpy(alloc(s_data_init.size(), 1, false), s_data_init.data(), s_data_init.size()); } -asmjit::Runtime& asmjit::get_global_runtime() +jit_runtime_base& asmjit::get_global_runtime() { // 16 MiB for internal needs static constexpr u64 size = 1024 * 1024 * 16; - struct custom_runtime final : asmjit::HostRuntime + struct custom_runtime final : jit_runtime_base { custom_runtime() noexcept { @@ -214,7 +209,7 @@ asmjit::Runtime& asmjit::get_global_runtime() { if (auto ptr = utils::memory_reserve(size, reinterpret_cast(addr))) { - m_pos.raw() = static_cast(ptr); + m_pos.raw() = static_cast(ptr); break; } } @@ -226,49 +221,26 @@ asmjit::Runtime& asmjit::get_global_runtime() utils::memory_commit(m_pos, size, utils::protection::wx); } - custom_runtime(const custom_runtime&) = delete; - - custom_runtime& operator=(const custom_runtime&) = delete; - - asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override + uchar* _alloc(usz size, usz align) noexcept override { - usz codeSize = code->getCodeSize(); - if (!codeSize) [[unlikely]] + return m_pos.atomic_op([&](uchar*& pos) -> uchar* { - *dst = nullptr; - return asmjit::kErrorNoCodeGenerated; - } + const auto r = reinterpret_cast(utils::align(uptr(pos), align)); - void* p = m_pos.fetch_add(utils::align(codeSize, 64)); - if (!p || m_pos > m_max) [[unlikely]] - { - *dst = nullptr; - jit_log.fatal("Out of memory (static asmjit)"); - return asmjit::kErrorNoVirtualMemory; - } + if (r >= pos && r + size > pos && r + size <= m_max) + { + pos = r + size; + return r; + } - usz relocSize = code->relocate(p); - if (!relocSize) [[unlikely]] - { - *dst = nullptr; - return asmjit::kErrorInvalidState; - } - - flush(p, relocSize); - *dst = p; - - return asmjit::kErrorOk; - } - - asmjit::Error _release(void*) noexcept override - { - return asmjit::kErrorOk; + return nullptr; + }); } private: - atomic_t m_pos{}; + atomic_t m_pos{}; - std::byte* m_max{}; + uchar* m_max{}; }; // Magic static @@ -276,37 +248,17 @@ asmjit::Runtime& asmjit::get_global_runtime() return g_rt; } -asmjit::Error asmjit::inline_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept +asmjit::inline_runtime::inline_runtime(uchar* data, usz size) + : m_data(data) + , m_size(size) { - usz codeSize = code->getCodeSize(); - if (!codeSize) [[unlikely]] - { - *dst = nullptr; - return asmjit::kErrorNoCodeGenerated; - } - - if (utils::align(codeSize, 4096) > m_size) [[unlikely]] - { - *dst = nullptr; - return asmjit::kErrorNoVirtualMemory; - } - - usz relocSize = code->relocate(m_data); - if (!relocSize) [[unlikely]] - { - *dst = nullptr; - return asmjit::kErrorInvalidState; - } - - flush(m_data, relocSize); - *dst = m_data; - - return asmjit::kErrorOk; } -asmjit::Error asmjit::inline_runtime::_release(void*) noexcept +uchar* asmjit::inline_runtime::_alloc(usz size, usz align) noexcept { - return asmjit::kErrorOk; + ensure(align <= 4096); + + return size <= m_size ? m_data : nullptr; } asmjit::inline_runtime::~inline_runtime() @@ -397,19 +349,19 @@ static u64 make_null_function(const std::string& name) using namespace asmjit; // Build a "null" function that contains its name - const auto func = build_function_asm("NULL", [&](X86Assembler& c, auto& args) + const auto func = build_function_asm("NULL", [&](x86::Assembler& c, auto& args) { Label data = c.newLabel(); c.lea(args[0], x86::qword_ptr(data, 0)); - c.jmp(imm_ptr(&null)); - c.align(kAlignCode, 16); + c.jmp(Imm(&null)); + c.align(AlignMode::kCode, 16); c.bind(data); // Copy function name bytes for (char ch : name) c.db(ch); c.db(0); - c.align(kAlignData, 16); + c.align(AlignMode::kData, 16); }); func_ptr = reinterpret_cast(func); diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 1b111eba0e..68e0c8bd30 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -4,7 +4,9 @@ // Include asmjit with warnings ignored #define ASMJIT_EMBED -#define ASMJIT_DEBUG +#define ASMJIT_STATIC +#define ASMJIT_BUILD_DEBUG +#undef Bool #ifdef _MSC_VER #pragma warning(push, 0) @@ -49,17 +51,27 @@ enum class jit_class spu_data, }; +struct jit_runtime_base +{ + jit_runtime_base() noexcept = default; + virtual ~jit_runtime_base() = default; + + jit_runtime_base(const jit_runtime_base&) = delete; + jit_runtime_base& operator=(const jit_runtime_base&) = delete; + + const asmjit::Environment& environment() const noexcept; + void* _add(asmjit::CodeHolder* code) noexcept; + virtual uchar* _alloc(usz size, usz align) noexcept = 0; +}; + // ASMJIT runtime for emitting code in a single 2G region -struct jit_runtime final : asmjit::HostRuntime +struct jit_runtime final : jit_runtime_base { jit_runtime(); ~jit_runtime() override; // Allocate executable memory - asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override; - - // Do nothing (deallocation is delayed) - asmjit::Error _release(void* p) noexcept override; + uchar* _alloc(usz size, usz align) noexcept override; // Allocate memory static u8* alloc(usz size, uint align, bool exec = true) noexcept; @@ -74,35 +86,25 @@ struct jit_runtime final : asmjit::HostRuntime namespace asmjit { // Should only be used to build global functions - asmjit::Runtime& get_global_runtime(); + jit_runtime_base& get_global_runtime(); // Don't use directly - class inline_runtime : public HostRuntime + class inline_runtime : public jit_runtime_base { uchar* m_data; usz m_size; public: - inline_runtime(const inline_runtime&) = delete; - - inline_runtime& operator=(const inline_runtime&) = delete; - - inline_runtime(uchar* data, usz size) - : m_data(data) - , m_size(size) - { - } - - asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override; - - asmjit::Error _release(void*) noexcept override; + inline_runtime(uchar* data, usz size); ~inline_runtime(); + + uchar* _alloc(usz size, usz align) noexcept override; }; // Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please) template - [[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, F func) + [[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::x86::Assembler& c, asmjit::Label fallback, F func) { Label fall = c.newLabel(); Label begin = c.newLabel(); @@ -117,7 +119,7 @@ namespace asmjit func(); // Other bad statuses are ignored regardless of repeat flag (TODO) - c.align(kAlignCode, 16); + c.align(AlignMode::kCode, 16); c.bind(begin); return fall; @@ -125,7 +127,7 @@ namespace asmjit } // Helper to spill RDX (EDX) register for RDTSC - inline void build_swap_rdx_with(asmjit::X86Assembler& c, std::array& args, const asmjit::X86Gp& with) + inline void build_swap_rdx_with(asmjit::x86::Assembler& c, std::array& args, const asmjit::x86::Gp& with) { #ifdef _WIN32 c.xchg(args[1], with); @@ -137,7 +139,7 @@ namespace asmjit } // Get full RDTSC value into chosen register (clobbers rax/rdx or saves only rax with other target) - inline void build_get_tsc(asmjit::X86Assembler& c, const asmjit::X86Gp& to = asmjit::x86::rax) + inline void build_get_tsc(asmjit::x86::Assembler& c, const asmjit::x86::Gp& to = asmjit::x86::rax) { if (&to != &x86::rax && &to != &x86::rdx) { @@ -164,6 +166,8 @@ namespace asmjit c.or_(to.r64(), x86::rdx); } } + + using imm_ptr = Imm; } // Build runtime function with asmjit::X86Assembler @@ -175,10 +179,9 @@ inline FT build_function_asm(std::string_view name, F&& builder) auto& rt = get_global_runtime(); CodeHolder code; - code.init(rt.getCodeInfo()); - code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign; + code.init(rt.environment()); - std::array args; + std::array args; #ifdef _WIN32 args[0] = x86::rcx; args[1] = x86::rdx; @@ -191,19 +194,12 @@ inline FT build_function_asm(std::string_view name, F&& builder) args[3] = x86::rcx; #endif - X86Assembler compiler(&code); + x86::Assembler compiler(&code); + compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign); builder(std::ref(compiler), args); - ensure(compiler.getLastError() == 0); - - FT result; - - if (rt.add(&result, &code)) - { - return nullptr; - } - - jit_announce(result, code.getCodeSize(), name); - return result; + const auto result = rt._add(&code); + jit_announce(result, code.codeSize(), name); + return reinterpret_cast(uptr(result)); } #ifdef __APPLE__ @@ -253,10 +249,9 @@ public: inline_runtime rt(m_data, Size); CodeHolder code; - code.init(rt.getCodeInfo()); - code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign; + code.init(rt.environment()); - std::array args; + std::array args; #ifdef _WIN32 args[0] = x86::rcx; args[1] = x86::rdx; @@ -269,19 +264,10 @@ public: args[3] = x86::rcx; #endif - X86Assembler compiler(&code); + x86::Assembler compiler(&code); + compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign); builder(std::ref(compiler), args); - - FT result; - - if (compiler.getLastError() || rt.add(&result, &code)) - { - ensure(false); - } - else - { - jit_announce(result, code.getCodeSize(), name); - } + jit_announce(rt._add(&code), code.codeSize(), name); } operator FT() const noexcept diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 077464845c..eb05a235b5 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -2190,7 +2190,7 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base* _base)) { - return build_function_asm("thread_base_trampoline", [&](asmjit::X86Assembler& c, auto& args) + return build_function_asm("thread_base_trampoline", [&](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -2203,7 +2203,7 @@ thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base* // Call finalize, return if zero c.mov(args[0], x86::rax); - c.call(imm_ptr(finalize)); + c.call(imm_ptr(static_cast(&finalize))); c.test(x86::rax, x86::rax); c.jz(_ret); diff --git a/rpcs3/Emu/Cell/PPUFunction.cpp b/rpcs3/Emu/Cell/PPUFunction.cpp index 8c12152836..853adcd0f0 100644 --- a/rpcs3/Emu/Cell/PPUFunction.cpp +++ b/rpcs3/Emu/Cell/PPUFunction.cpp @@ -1910,14 +1910,14 @@ std::vector& ppu_function_manager::access(bool ghc) static std::vector list_ghc { - build_function_asm("ppu_unregistered", [](asmjit::X86Assembler& c, auto& args) + build_function_asm("ppu_unregistered", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; c.mov(args[0], x86::rbp); c.jmp(imm_ptr(list[0])); }), - build_function_asm("ppu_return", [](asmjit::X86Assembler& c, auto& args) + build_function_asm("ppu_return", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -1937,7 +1937,7 @@ u32 ppu_function_manager::add_function(ppu_function_t function) list.push_back(function); // Generate trampoline - list2.push_back(build_function_asm("ppu_trampolinea", [&](asmjit::X86Assembler& c, auto& args) + list2.push_back(build_function_asm("ppu_trampolinea", [&](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 10fefd6a2a..ed7a265155 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -147,7 +147,7 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op); extern void do_cell_atomic_128_store(u32 addr, const void* to_write); -const auto ppu_gateway = built_function("ppu_gateway", [](asmjit::X86Assembler& c, auto& args) +const auto ppu_gateway = built_function("ppu_gateway", [](asmjit::x86::Assembler& c, auto& args) { // Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape using namespace asmjit; @@ -248,7 +248,7 @@ const auto ppu_gateway = built_function("ppu_gateway", []( c.ret(); }); -const extern auto ppu_escape = build_function_asm("ppu_escape", [](asmjit::X86Assembler& c, auto& args) +const extern auto ppu_escape = build_function_asm("ppu_escape", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -262,7 +262,7 @@ const extern auto ppu_escape = build_function_asm("ppu_esc void ppu_recompiler_fallback(ppu_thread& ppu); -const auto ppu_recompiler_fallback_ghc = build_function_asm("ppu_trampolineb", [](asmjit::X86Assembler& c, auto& args) +const auto ppu_recompiler_fallback_ghc = build_function_asm("ppu_trampolineb", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -1817,7 +1817,7 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr) return ppu_load_acquire_reservation(ppu, addr); } -const auto ppu_stcx_accurate_tx = built_function("ppu_stcx_accurate_tx", [](asmjit::X86Assembler& c, auto& args) +const auto ppu_stcx_accurate_tx = built_function("ppu_stcx_accurate_tx", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -1859,10 +1859,10 @@ const auto ppu_stcx_accurate_tx = built_functionc = &compiler; if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1)) @@ -137,7 +137,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) this->qw1 = &x86::rcx; #endif - const std::array vec_vars + const std::array vec_vars { &x86::xmm0, &x86::xmm1, @@ -333,20 +333,20 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) }); c->setExtraReg(x86::k7); - c->z().vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off)); + c->z().vmovdqa32(x86::zmm0, x86::zmmword_ptr(*qw1, j - ls_off)); } else { - c->vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off)); + c->vmovdqa32(x86::zmm0, x86::zmmword_ptr(*qw1, j - ls_off)); } if (first) { - c->vpcmpud(x86::k1, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4); + c->vpcmpud(x86::k1, x86::zmm0, x86::zmmword_ptr(x86::rax, code_off), 4); } else { - c->vpcmpud(x86::k3, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4); + c->vpcmpud(x86::k3, x86::zmm0, x86::zmmword_ptr(x86::rax, code_off), 4); c->korw(x86::k1, x86::k3, x86::k1); } @@ -378,15 +378,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) if (cmask == 0xff) { - c->vmovdqa(x86::ymm0, x86::yword_ptr(*ls, starta)); + c->vmovdqa(x86::ymm0, x86::ymmword_ptr(*ls, starta)); } else { c->vpxor(x86::ymm0, x86::ymm0, x86::ymm0); - c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask); + c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask); } - c->vpxor(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code)); + c->vpxor(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code)); c->vptest(x86::ymm0, x86::ymm0); c->jnz(label_diff); @@ -401,9 +401,9 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) const u32 cmask1 = get_code_mask(starta + 32, enda); c->vpxor(x86::ymm0, x86::ymm0, x86::ymm0); - c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask0); - c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta + 32), cmask1); - c->vpxor(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code)); + c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask0); + c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta + 32), cmask1); + c->vpxor(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code)); c->vptest(x86::ymm0, x86::ymm0); c->jnz(label_diff); @@ -453,21 +453,21 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) xmm2z = true; } - c->vpblendd(x86::ymm1, x86::ymm2, x86::yword_ptr(*qw1, j - ls_off), cmask); + c->vpblendd(x86::ymm1, x86::ymm2, x86::ymmword_ptr(*qw1, j - ls_off), cmask); } else { - c->vmovdqa32(x86::ymm1, x86::yword_ptr(*qw1, j - ls_off)); + c->vmovdqa32(x86::ymm1, x86::ymmword_ptr(*qw1, j - ls_off)); } // Perform bitwise comparison and accumulate if (first) { - c->vpxor(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off)); + c->vpxor(x86::ymm0, x86::ymm1, x86::ymmword_ptr(x86::rax, code_off)); } else { - c->vpternlogd(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off), 0xf6 /* orAxorBC */); + c->vpternlogd(x86::ymm0, x86::ymm1, x86::ymmword_ptr(x86::rax, code_off), 0xf6 /* orAxorBC */); } for (u32 i = j; i < j + 32; i += 4) @@ -500,15 +500,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) if (cmask == 0xff) { - c->vmovaps(x86::ymm0, x86::yword_ptr(*ls, starta)); + c->vmovaps(x86::ymm0, x86::ymmword_ptr(*ls, starta)); } else { c->vxorps(x86::ymm0, x86::ymm0, x86::ymm0); - c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask); + c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask); } - c->vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code)); + c->vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code)); c->vptest(x86::ymm0, x86::ymm0); c->jnz(label_diff); @@ -523,9 +523,9 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) const u32 cmask1 = get_code_mask(starta + 32, enda); c->vxorps(x86::ymm0, x86::ymm0, x86::ymm0); - c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask0); - c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta + 32), cmask1); - c->vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code)); + c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask0); + c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta + 32), cmask1); + c->vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code)); c->vptest(x86::ymm0, x86::ymm0); c->jnz(label_diff); @@ -586,21 +586,21 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) xmm2z = true; } - c->vblendps(reg1, x86::ymm2, x86::yword_ptr(*ls, j - ls_off), cmask); + c->vblendps(reg1, x86::ymm2, x86::ymmword_ptr(*ls, j - ls_off), cmask); } else { - c->vmovaps(reg1, x86::yword_ptr(*ls, j - ls_off)); + c->vmovaps(reg1, x86::ymmword_ptr(*ls, j - ls_off)); } // Perform bitwise comparison and accumulate if (!order++) { - c->vxorps(reg0, reg1, x86::yword_ptr(x86::rax, code_off)); + c->vxorps(reg0, reg1, x86::ymmword_ptr(x86::rax, code_off)); } else { - c->vxorps(reg1, reg1, x86::yword_ptr(x86::rax, code_off)); + c->vxorps(reg1, reg1, x86::ymmword_ptr(x86::rax, code_off)); c->vorps(reg0, reg1, reg0); } @@ -800,7 +800,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) { if (m_preds.count(pos)) { - c->align(kAlignCode, 16); + c->align(AlignMode::kCode, 16); } c->bind(found->second); @@ -832,7 +832,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) } // Simply return - c->align(kAlignCode, 16); + c->align(AlignMode::kCode, 16); c->bind(label_stop); c->add(x86::rsp, 0x28); c->ret(); @@ -840,7 +840,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) if (g_cfg.core.spu_verification) { // Dispatch - c->align(kAlignCode, 16); + c->align(AlignMode::kCode, 16); c->bind(label_diff); c->inc(SPU_OFF_64(block_failure)); c->add(x86::rsp, 0x28); @@ -855,7 +855,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) // Build instruction dispatch table if (instr_table.isValid()) { - c->align(kAlignData, 8); + c->align(AlignMode::kData, 8); c->bind(instr_table); // Get actual instruction table bounds @@ -877,7 +877,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) } } - c->align(kAlignData, words_align); + c->align(AlignMode::kData, words_align); c->bind(label_code); for (u32 d : words) c->dd(d); @@ -893,20 +893,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) xmm_consts.clear(); // Compile and get function address - spu_function_t fn; + spu_function_t fn = reinterpret_cast(m_asmrt._add(&code)); - if (auto err = m_asmrt.add(&fn, &code)) + if (!fn) { - if (err == asmjit::ErrorCode::kErrorNoVirtualMemory) - { - return nullptr; - } - spu_log.fatal("Failed to build a function"); } else { - jit_announce(fn, code.getCodeSize(), fmt::format("spu-b-%s", fmt::base57(be_t(m_hash_start)))); + jit_announce(fn, code.codeSize(), fmt::format("spu-b-%s", fmt::base57(be_t(m_hash_start)))); } // Install compiled function pointer @@ -927,7 +922,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func) { // Add ASMJIT logs fmt::append(log, "Address: %p\n\n", fn); - log += logger.getString(); + log.append(logger._content.data(), logger._content.size()); log += "\n\n\n"; // Append log file @@ -962,7 +957,7 @@ spu_recompiler::XmmLink spu_recompiler::XmmGet(s8 reg, XmmType type) // get xmm return result; } -inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data) +inline asmjit::x86::Mem spu_recompiler::XmmConst(const v128& data) { // Find existing const auto& xmm_label = xmm_consts[std::make_pair(data._u64[0], data._u64[1])]; @@ -973,7 +968,7 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data) consts.emplace_back([=, this] { - c->align(asmjit::kAlignData, 16); + c->align(asmjit::AlignMode::kData, 16); c->bind(xmm_label); c->dq(data._u64[0]); c->dq(data._u64[1]); @@ -983,17 +978,17 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data) return asmjit::x86::oword_ptr(xmm_label); } -inline asmjit::X86Mem spu_recompiler::XmmConst(const __m128& data) +inline asmjit::x86::Mem spu_recompiler::XmmConst(const __m128& data) { return XmmConst(v128::fromF(data)); } -inline asmjit::X86Mem spu_recompiler::XmmConst(const __m128i& data) +inline asmjit::x86::Mem spu_recompiler::XmmConst(const __m128i& data) { return XmmConst(v128::fromV(data)); } -inline asmjit::X86Mem spu_recompiler::get_pc(u32 addr) +inline asmjit::x86::Mem spu_recompiler::get_pc(u32 addr) { return asmjit::x86::qword_ptr(*pc0, addr - m_base); } @@ -1108,7 +1103,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) c->mov(SPU_OFF_32(pc), *addr); c->mov(*arg0, *cpu); c->add(x86::rsp, 0x28); - c->jmp(imm_ptr(_throw)); + c->jmp(imm_ptr(+_throw)); // Save addr in srr0 and disable interrupts c->bind(intr); @@ -1123,7 +1118,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) c->test(*addr, 0xff80007f); c->cmovnz(*addr, rip->r32()); c->shr(*addr, 5); - c->align(kAlignCode, 16); + c->align(AlignMode::kCode, 16); c->bind(no_intr); } @@ -1210,7 +1205,7 @@ void spu_recompiler::branch_set_link(u32 target) after.emplace_back([=, this, target = local->second] { // Clear return info after use - c->align(kAlignCode, 16); + c->align(AlignMode::kCode, 16); c->bind(ret); c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3)); c->and_(qw1->r32(), 0x3fff0); @@ -1246,9 +1241,9 @@ void spu_recompiler::fall(spu_opcode_t op) c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->mov(arg1->r32(), op.opcode); - c->mov(*qw0, asmjit::imm_ptr(asmjit::Internal::ptr_cast(g_spu_interpreter_fast.decode(op.opcode)))); + c->mov(*qw0, asmjit::imm_ptr(g_spu_interpreter_fast.decode(op.opcode))); c->mov(*arg0, *cpu); - c->call(asmjit::imm_ptr(gate)); + c->call(asmjit::imm_ptr(+gate)); } void spu_recompiler::UNK(spu_opcode_t op) @@ -1266,7 +1261,7 @@ void spu_recompiler::UNK(spu_opcode_t op) c->mov(arg1->r32(), op.opcode); c->mov(*arg0, *cpu); c->add(asmjit::x86::rsp, 0x28); - c->jmp(asmjit::imm_ptr(gate)); + c->jmp(asmjit::imm_ptr(+gate)); m_pos = -1; } @@ -1295,7 +1290,7 @@ void spu_recompiler::STOP(spu_opcode_t op) c->mov(arg1->r32(), op.opcode & 0x3fff); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_stop)); - c->align(kAlignCode, 16); + c->align(AlignMode::kCode, 16); c->bind(ret); c->add(SPU_OFF_32(pc), 4); @@ -1362,14 +1357,14 @@ void spu_recompiler::RDCH(spu_opcode_t op) { using namespace asmjit; - auto read_channel = [&](X86Mem channel_ptr, bool sync = true) + auto read_channel = [&](x86::Mem channel_ptr, bool sync = true) { Label wait = c->newLabel(); Label again = c->newLabel(); Label ret = c->newLabel(); c->mov(addr->r64(), channel_ptr); c->xor_(qw0->r32(), qw0->r32()); - c->align(kAlignCode, 16); + c->align(AlignMode::kCode, 16); c->bind(again); c->bt(addr->r64(), spu_channel::off_count); c->jnc(wait); @@ -1380,7 +1375,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) c->lea(addr->r64(), get_pc(pos)); c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); - c->mov(arg1->r32(), op.ra); + c->mov(arg1->r32(), +op.ra); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_rdch)); c->jmp(ret); @@ -1482,13 +1477,12 @@ void spu_recompiler::RDCH(spu_opcode_t op) *_res = v128::from32r(out); }; - using ftype = void (*)(spu_thread*, v128*); c->lea(addr->r64(), get_pc(m_pos)); c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); c->lea(*arg1, SPU_OFF_128(gpr, op.rt)); c->mov(*arg0, *cpu); - c->call(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr(sub1) : asmjit::imm_ptr(sub2)); + c->call(asmjit::imm_ptr(g_cfg.core.spu_loop_detection ? +sub1 : +sub2)); return; } case SPU_RdEventMask: @@ -1523,7 +1517,7 @@ void spu_recompiler::RDCH(spu_opcode_t op) c->lea(addr->r64(), get_pc(m_pos)); c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); - c->mov(arg1->r32(), op.ra); + c->mov(arg1->r32(), +op.ra); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_rdch)); c->movd(x86::xmm0, *addr); @@ -1540,7 +1534,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op) { using namespace asmjit; - auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false) + auto ch_cnt = [&](x86::Mem channel_ptr, bool inv = false) { // Load channel count const XmmLink& vr = XmmAlloc(); @@ -1631,7 +1625,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op) c->lea(addr->r64(), get_pc(m_pos)); c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); - c->mov(arg1->r32(), op.ra); + c->mov(arg1->r32(), +op.ra); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_rchcnt)); break; @@ -2320,7 +2314,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) Label ret = c->newLabel(); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox)); - c->align(kAlignCode, 16); + c->align(AlignMode::kCode, 16); c->bind(again); c->mov(qw0->r32(), qw0->r32()); c->bt(addr->r64(), spu_channel::off_count); @@ -2332,7 +2326,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) c->lea(addr->r64(), get_pc(pos)); c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); - c->mov(arg1->r32(), op.ra); + c->mov(arg1->r32(), +op.ra); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_wrch)); c->jmp(ret); @@ -2359,7 +2353,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) c->lea(addr->r64(), get_pc(pos)); c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); - c->lea(arg1->r32(), MFC_WrTagMask); + c->mov(arg1->r32(), MFC_WrTagMask); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_wrch)); c->jmp(ret); @@ -2383,7 +2377,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) c->lea(addr->r64(), get_pc(pos)); c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); - c->mov(arg1->r32(), op.ra); + c->mov(arg1->r32(), +op.ra); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_wrch)); c->jmp(ret); @@ -2476,7 +2470,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) c->btr(SPU_OFF_32(ch_stall_mask), arg1->r32()); c->jnc(ret); c->mov(*arg0, *cpu); - c->call(imm_ptr(sub)); + c->call(imm_ptr(+sub)); c->bind(ret); return; } @@ -2488,7 +2482,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) }; c->mov(*arg0, *cpu); - c->call(imm_ptr(sub)); + c->call(imm_ptr(+sub)); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(SPU_OFF_32(ch_dec_value), qw0->r32()); return; @@ -2515,7 +2509,7 @@ void spu_recompiler::WRCH(spu_opcode_t op) c->lea(addr->r64(), get_pc(m_pos)); c->and_(*addr, 0x3fffc); c->mov(SPU_OFF_32(pc), *addr); - c->mov(arg1->r32(), op.ra); + c->mov(arg1->r32(), +op.ra); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(*arg0, *cpu); c->call(imm_ptr(spu_wrch)); @@ -2529,7 +2523,7 @@ void spu_recompiler::BIZ(spu_opcode_t op) after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1] { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); @@ -2545,7 +2539,7 @@ void spu_recompiler::BINZ(spu_opcode_t op) after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1] { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); @@ -2561,7 +2555,7 @@ void spu_recompiler::BIHZ(spu_opcode_t op) after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1] { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); @@ -2577,7 +2571,7 @@ void spu_recompiler::BIHNZ(spu_opcode_t op) after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1] { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); c->and_(*addr, 0x3fffc); @@ -2669,13 +2663,13 @@ void spu_recompiler::BISLED(spu_opcode_t op) asmjit::Label branch_label = c->newLabel(); c->mov(*arg0, *cpu); - c->call(asmjit::imm_ptr(get_events)); + c->call(asmjit::imm_ptr(+get_events)); c->test(*addr, 1); c->jne(branch_label); after.emplace_back([=, this]() { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); c->and_(*addr, 0x3fffc); branch_indirect(op, true, false); @@ -2895,7 +2889,7 @@ void spu_recompiler::CDX(spu_opcode_t op) const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - c->mov(*qw0, asmjit::imm_u(0x0001020304050607)); + c->mov(*qw0, asmjit::Imm(0x0001020304050607ull)); c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0); } @@ -3028,7 +3022,7 @@ void spu_recompiler::CBD(spu_opcode_t op) //} c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); - if (op.i7) c->add(*addr, op.i7); + if (op.i7) c->add(*addr, +op.i7); c->not_(*addr); c->and_(*addr, 0xf); @@ -3052,7 +3046,7 @@ void spu_recompiler::CHD(spu_opcode_t op) //} c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); - if (op.i7) c->add(*addr, op.i7); + if (op.i7) c->add(*addr, +op.i7); c->not_(*addr); c->and_(*addr, 0xe); @@ -3076,7 +3070,7 @@ void spu_recompiler::CWD(spu_opcode_t op) //} c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); - if (op.i7) c->add(*addr, op.i7); + if (op.i7) c->add(*addr, +op.i7); c->not_(*addr); c->and_(*addr, 0xc); @@ -3100,14 +3094,14 @@ void spu_recompiler::CDD(spu_opcode_t op) //} c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3)); - if (op.i7) c->add(*addr, op.i7); + if (op.i7) c->add(*addr, +op.i7); c->not_(*addr); c->and_(*addr, 0x8); const XmmLink& vr = XmmAlloc(); c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); c->movdqa(SPU_OFF_128(gpr, op.rt), vr); - c->mov(*qw0, asmjit::imm_u(0x0001020304050607)); + c->mov(*qw0, asmjit::Imm(0x0001020304050607ull)); c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0); } @@ -4048,7 +4042,7 @@ void spu_recompiler::BRZ(spu_opcode_t op) after.emplace_back([=, this]() { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); branch_fixed(target); }); @@ -4088,7 +4082,7 @@ void spu_recompiler::BRNZ(spu_opcode_t op) after.emplace_back([=, this]() { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); branch_fixed(target); }); @@ -4109,7 +4103,7 @@ void spu_recompiler::BRHZ(spu_opcode_t op) after.emplace_back([=, this]() { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); branch_fixed(target); }); @@ -4130,7 +4124,7 @@ void spu_recompiler::BRHNZ(spu_opcode_t op) after.emplace_back([=, this]() { - c->align(asmjit::kAlignCode, 16); + c->align(asmjit::AlignMode::kCode, 16); c->bind(branch_label); branch_fixed(target); }); @@ -4459,7 +4453,7 @@ void spu_recompiler::CGTBI(spu_opcode_t op) void spu_recompiler::HGTI(spu_opcode_t op) { - c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_s32, 3), op.si10); + c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_s32, 3), +op.si10); asmjit::Label label = c->newLabel(); asmjit::Label ret = c->newLabel(); @@ -4503,7 +4497,7 @@ void spu_recompiler::CLGTBI(spu_opcode_t op) void spu_recompiler::HLGTI(spu_opcode_t op) { - c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), op.si10); + c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), +op.si10); asmjit::Label label = c->newLabel(); asmjit::Label ret = c->newLabel(); @@ -4565,7 +4559,7 @@ void spu_recompiler::CEQBI(spu_opcode_t op) void spu_recompiler::HEQI(spu_opcode_t op) { - c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), op.si10); + c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), +op.si10); asmjit::Label label = c->newLabel(); asmjit::Label ret = c->newLabel(); @@ -4636,12 +4630,12 @@ void spu_recompiler::SHUFB(spu_opcode_t op) c->vpcmpub(asmjit::x86::k1, vc, XmmConst(_mm_set1_epi8(-0x40)), 5 /* GE */); c->vpxor(vm, vc, XmmConst(_mm_set1_epi8(0xf))); c->setExtraReg(asmjit::x86::k1); - c->z().vblendmb(vc, vc, XmmConst(_mm_set1_epi8(-1))); // {k1} + c->z().vpblendmb(vc, vc, XmmConst(_mm_set1_epi8(-1))); // {k1} c->vpcmpub(asmjit::x86::k2, vm, XmmConst(_mm_set1_epi8(-0x20)), 5 /* GE */); c->vptestmb(asmjit::x86::k1, vm, XmmConst(_mm_set1_epi8(0x10))); c->vpshufb(vt, va, vm); c->setExtraReg(asmjit::x86::k2); - c->z().vblendmb(va, va, XmmConst(_mm_set1_epi8(0x7f))); // {k2} + c->z().vpblendmb(va, va, XmmConst(_mm_set1_epi8(0x7f))); // {k2} c->setExtraReg(asmjit::x86::k1); c->vpshufb(vt, vb, vm); // {k1} c->vpternlogd(vt, va, vc, 0xf6 /* orAxorBC */); diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index 7d5073d02f..f221b33ab0 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -24,23 +24,23 @@ private: u32 m_base; // emitter: - asmjit::X86Assembler* c; + asmjit::x86::Assembler* c; // arguments: - const asmjit::X86Gp* cpu; - const asmjit::X86Gp* ls; - const asmjit::X86Gp* rip; - const asmjit::X86Gp* pc0; + const asmjit::x86::Gp* cpu; + const asmjit::x86::Gp* ls; + const asmjit::x86::Gp* rip; + const asmjit::x86::Gp* pc0; // Native args or temp variables: - const asmjit::X86Gp* arg0; - const asmjit::X86Gp* arg1; - const asmjit::X86Gp* qw0; - const asmjit::X86Gp* qw1; + const asmjit::x86::Gp* arg0; + const asmjit::x86::Gp* arg1; + const asmjit::x86::Gp* qw0; + const asmjit::x86::Gp* qw1; // temporary: - const asmjit::X86Gp* addr; - std::array vec; + const asmjit::x86::Gp* addr; + std::array vec; // workload for the end of function: std::vector> after; @@ -60,10 +60,10 @@ private: class XmmLink { - const asmjit::X86Xmm* m_var; + const asmjit::x86::Xmm* m_var; public: - XmmLink(const asmjit::X86Xmm*& xmm_var) + XmmLink(const asmjit::x86::Xmm*& xmm_var) : m_var(xmm_var) { xmm_var = nullptr; @@ -71,7 +71,7 @@ private: XmmLink(XmmLink&&) = default; // MoveConstructible + delete copy constructor and copy/move operators - operator const asmjit::X86Xmm&() const + operator const asmjit::x86::Xmm&() const { return *m_var; } @@ -87,11 +87,11 @@ private: XmmLink XmmAlloc(); XmmLink XmmGet(s8 reg, XmmType type); - asmjit::X86Mem XmmConst(const v128& data); - asmjit::X86Mem XmmConst(const __m128& data); - asmjit::X86Mem XmmConst(const __m128i& data); + asmjit::x86::Mem XmmConst(const v128& data); + asmjit::x86::Mem XmmConst(const __m128& data); + asmjit::x86::Mem XmmConst(const __m128i& data); - asmjit::X86Mem get_pc(u32 addr); + asmjit::x86::Mem get_pc(u32 addr); void branch_fixed(u32 target, bool absolute = false); void branch_indirect(spu_opcode_t op, bool jt = false, bool ret = true); void branch_set_link(u32 target); diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 9b413421f1..0d4b72c508 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -43,7 +43,7 @@ namespace asmjit static constexpr spu_opcode_t s_op{}; template - static void build_spu_gpr_load(X86Assembler& c, X86Xmm x, const bf_t&, bool store = false) + static void build_spu_gpr_load(x86::Assembler& c, x86::Xmm x, const bf_t&, bool store = false) { static_assert(N == 7, "Invalid bitfield"); @@ -87,7 +87,7 @@ namespace asmjit } template - static void build_spu_gpr_store(X86Assembler& c, X86Xmm x, const bf_t&, bool store = true) + static void build_spu_gpr_store(x86::Assembler& c, x86::Xmm x, const bf_t&, bool store = true) { build_spu_gpr_load(c, x, bf_t{}, store); } @@ -1733,7 +1733,7 @@ bool spu_interpreter::SHUFB(spu_thread& spu, spu_opcode_t op) return true; } -const spu_inter_func_t optimized_shufb = build_function_asm("spu_shufb", [](asmjit::X86Assembler& c, auto& /*args*/) +const spu_inter_func_t optimized_shufb = build_function_asm("spu_shufb", [](asmjit::x86::Assembler& c, auto& /*args*/) { using namespace asmjit; @@ -1793,7 +1793,7 @@ const spu_inter_func_t optimized_shufb = build_function_asm("s c.mov(x86::eax, 1); c.ret(); - c.align(kAlignData, 16); + c.align(AlignMode::kData, 16); c.bind(xc0); c.dq(0xc0c0c0c0c0c0c0c0); c.dq(0xc0c0c0c0c0c0c0c0); diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 0eaf6c76f3..dddb48821f 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -160,7 +160,7 @@ DECLARE(spu_runtime::tr_all) = [] return reinterpret_cast(trptr); }(); -DECLARE(spu_runtime::g_gateway) = built_function("spu_gateway", [](asmjit::X86Assembler& c, auto& args) +DECLARE(spu_runtime::g_gateway) = built_function("spu_gateway", [](asmjit::x86::Assembler& c, auto& args) { // Gateway for SPU dispatcher, converts from native to GHC calling convention, also saves RSP value for spu_escape using namespace asmjit; @@ -249,7 +249,7 @@ DECLARE(spu_runtime::g_gateway) = built_function("spu_gateway", c.ret(); }); -DECLARE(spu_runtime::g_escape) = build_function_asm("spu_escape", [](asmjit::X86Assembler& c, auto& args) +DECLARE(spu_runtime::g_escape) = build_function_asm("spu_escape", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -261,7 +261,7 @@ DECLARE(spu_runtime::g_escape) = build_function_asm("spu_e c.ret(); }); -DECLARE(spu_runtime::g_tail_escape) = build_function_asm("spu_tail_escape", [](asmjit::X86Assembler& c, auto& args) +DECLARE(spu_runtime::g_tail_escape) = build_function_asm("spu_tail_escape", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 9e0cf8d653..66d28ded23 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -405,7 +405,7 @@ std::array op_branch_targets(u32 pc, spu_opcode_t op) return res; } -const auto spu_putllc_tx = built_function("spu_putllc_tx", [](asmjit::X86Assembler& c, auto& args) +const auto spu_putllc_tx = built_function("spu_putllc_tx", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -455,14 +455,14 @@ const auto spu_putllc_tx = built_function("spu_putlluc_tx", [](asmjit::X86Assembler& c, auto& args) +const auto spu_putlluc_tx = built_function("spu_putlluc_tx", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -694,10 +694,10 @@ const auto spu_putlluc_tx = built_function("spu_getllar_tx", [](asmjit::X86Assembler& c, auto& args) +const auto spu_getllar_tx = built_function("spu_getllar_tx", [](asmjit::x86::Assembler& c, auto& args) { using namespace asmjit; @@ -847,10 +847,10 @@ const auto spu_getllar_tx = built_function - void build_copy_data_swap_u32_avx3(asmjit::X86Assembler& c, std::array& args, const RT& rmask, const RT& rload, const RT& rtest) + void build_copy_data_swap_u32_avx3(asmjit::x86::Assembler& c, std::array& args, const RT& rmask, const RT& rload, const RT& rtest) { using namespace asmjit; @@ -173,26 +173,26 @@ namespace c.and_(args[0], -Size * 4); c.add(args[2].r32(), args[3].r32()); - c.k(x86::k1).z().vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u)); + c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u)); c.vpshufb(rload, rload, rmask); if (Compare) - c.k(x86::k1).z().vpxord(rtest, rload, X86Mem(args[0], 0, Size * 4u)); - c.k(x86::k1).vmovdqa32(X86Mem(args[0], 0, Size * 4u), rload); + c.k(x86::k1).z().vpxord(rtest, rload, x86::Mem(args[0], 0, Size * 4u)); + c.k(x86::k1).vmovdqa32(x86::Mem(args[0], 0, Size * 4u), rload); c.lea(args[0], x86::qword_ptr(args[0], Size * 4)); c.lea(args[1], x86::qword_ptr(args[1], Size * 4)); c.sub(args[2].r32(), Size); c.or_(x86::eax, -1); - c.align(kAlignCode, 16); + c.align(AlignMode::kCode, 16); c.bind(loop); c.cmp(args[2].r32(), Size); c.jbe(tail); - c.vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u)); + c.vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u)); c.vpshufb(rload, rload, rmask); if (Compare) - c.vpternlogd(rtest, rload, X86Mem(args[0], 0, Size * 4u), 0xf6); // orAxorBC - c.vmovdqa32(X86Mem(args[0], 0, Size * 4u), rload); + c.vpternlogd(rtest, rload, x86::Mem(args[0], 0, Size * 4u), 0xf6); // orAxorBC + c.vmovdqa32(x86::Mem(args[0], 0, Size * 4u), rload); c.lea(args[0], x86::qword_ptr(args[0], Size * 4)); c.lea(args[1], x86::qword_ptr(args[1], Size * 4)); c.sub(args[2].r32(), Size); @@ -202,11 +202,11 @@ namespace c.shlx(x86::eax, x86::eax, args[2].r32()); c.not_(x86::eax); c.kmovw(x86::k1, x86::eax); - c.k(x86::k1).z().vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u)); + c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u)); c.vpshufb(rload, rload, rmask); if (Compare) - c.k(x86::k1).vpternlogd(rtest, rload, X86Mem(args[0], 0, Size * 4u), 0xf6); - c.k(x86::k1).vmovdqu32(X86Mem(args[0], 0, Size * 4u), rload); + c.k(x86::k1).vpternlogd(rtest, rload, x86::Mem(args[0], 0, Size * 4u), 0xf6); + c.k(x86::k1).vmovdqu32(x86::Mem(args[0], 0, Size * 4u), rload); if (Compare) { @@ -230,7 +230,7 @@ namespace } template - void build_copy_data_swap_u32(asmjit::X86Assembler& c, std::array& args) + void build_copy_data_swap_u32(asmjit::x86::Assembler& c, std::array& args) { using namespace asmjit;