diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 80d285f46f..26ad65d79f 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -223,14 +223,7 @@ namespace asmjit template ().*MPtr)> x86::Mem ppu_mem() { - if (ppu_base == 0) - { - return x86::ptr(arg_ppu, static_cast(::offset32(MPtr)), Size); - } - else - { - return x86::ptr(x86::r10, static_cast(::offset32(MPtr) - ppu_base), Size); - } + return x86::ptr(arg_ppu, static_cast(::offset32(MPtr)), Size); } template @@ -316,22 +309,20 @@ struct ppu_abstract_t } } vr; - struct abstract_sat : asmjit::mem_lazy + struct abstract_sat : asmjit::mem_type { - const asmjit::Operand& eval(bool) - { + abstract_sat() #if defined(ARCH_X64) - Operand::operator=(static_cast(g_vc)->ppu_sat()); + : asmjit::mem_type(static_cast(g_vc)->ppu_sat()) #endif - - return *this; + { } template void operator=(T&& _val) const { #if defined(ARCH_X64) - FOR_X64(store_op, kIdMovaps, kIdVmovaps, static_cast(g_vc)->ppu_sat(), std::forward(_val)); + FOR_X64(store_op, kIdMovaps, kIdVmovaps, *this, std::forward(_val)); #endif } } sat{}; diff --git a/rpcs3/util/simd.hpp b/rpcs3/util/simd.hpp index a6095bc050..c3c1b52ff8 100644 --- a/rpcs3/util/simd.hpp +++ b/rpcs3/util/simd.hpp @@ -196,6 +196,11 @@ namespace asmjit return mem.eval(std::is_reference_v); } + inline decltype(auto) arg_eval(const Operand& mem, u32) + { + return mem; + } + inline decltype(auto) arg_eval(Operand& mem, u32) { return mem; @@ -206,6 +211,31 @@ namespace asmjit return std::move(mem); } + template + inline bool arg_use_evex(const auto& op) + { + constexpr auto _class = arg_classify; + if constexpr (_class == arg_class::imm_rv) + return true; + else if constexpr (_class == arg_class::imm_lv) + return false; + else if (op.isMem()) + { + // Check if broadcast is set, or if the offset immediate can use disp8*N encoding + mem_type mem{}; + mem.copyFrom(op); + if (mem.hasBaseLabel()) + return false; + if (mem.hasBroadcast()) + return true; + if (!mem.hasOffset() || mem.offset() % mem.size() || u64(mem.offset() + 128) < 256 || u64(mem.offset() / mem.size() + 128) >= 256) + return false; + return true; + } + + return false; + } + template vec_type unary_op(x86::Inst::Id op, x86::Inst::Id op2, A&& a, Args&&... args) { @@ -259,7 +289,7 @@ namespace asmjit if (utils::has_avx512() && evex_op) { - if (!dst.hasBaseLabel() && dst.hasOffset() && dst.offset() % dst.size() == 0 && dst.offset() / dst.size() + 128 < 256) + if (!dst.hasBaseLabel() && dst.hasOffset() && dst.offset() % dst.size() == 0 && u64(dst.offset() + 128) >= 256 && u64(dst.offset() / dst.size() + 128) < 256) { ensure(!g_vc->evex().emit(evex_op, dst, arg_eval(std::forward(s), 16))); return; @@ -279,7 +309,7 @@ namespace asmjit // Use src1 as a destination src1 = arg_eval(std::forward(a), 16); - if (utils::has_avx512() && evex_op && (arg_classify == arg_class::imm_rv || arg_classify == arg_class::mem_rv || b.isMem())) + if (utils::has_avx512() && evex_op && arg_use_evex(b)) { ensure(!g_vc->evex().emit(evex_op, src1, src1, arg_eval(std::forward(b), esize), std::forward(args)...)); return vec_type{src1.id()}; @@ -322,7 +352,7 @@ namespace asmjit } } - if (utils::has_avx512() && evex_op && (arg_classify == arg_class::imm_rv || arg_classify == arg_class::mem_rv || b.isMem())) + if (utils::has_avx512() && evex_op && arg_use_evex(b)) { ensure(!g_vc->evex().emit(evex_op, src1, vec_type{a.id()}, arg_eval(std::forward(b), esize), std::forward(args)...)); return vec_type{src1.id()}; @@ -352,7 +382,7 @@ namespace asmjit } while (0); - if (utils::has_avx512() && evex_op && (arg_classify == arg_class::imm_rv || arg_classify == arg_class::mem_rv || b.isMem())) + if (utils::has_avx512() && evex_op && arg_use_evex(b)) { ensure(!g_vc->evex().emit(evex_op, src1, src1, arg_eval(std::forward(b), esize), std::forward(args)...)); }