PPU: improve interpreter codegen

Implement more sensible check for EVEX prefix and SSE
This commit is contained in:
Nekotekina 2022-01-18 00:08:41 +03:00
parent 4704367382
commit 4728478247
2 changed files with 40 additions and 19 deletions

View File

@ -223,14 +223,7 @@ namespace asmjit
template <auto MPtr, u32 Size = sizeof(std::declval<ppu_thread&>().*MPtr)>
x86::Mem ppu_mem()
{
if (ppu_base == 0)
{
return x86::ptr(arg_ppu, static_cast<s32>(::offset32(MPtr)), Size);
}
else
{
return x86::ptr(x86::r10, static_cast<s32>(::offset32(MPtr) - ppu_base), Size);
}
return x86::ptr(arg_ppu, static_cast<s32>(::offset32(MPtr)), Size);
}
template <u32 Size = 16, uint I, uint N>
@ -316,22 +309,20 @@ struct ppu_abstract_t
}
} vr;
struct abstract_sat : asmjit::mem_lazy
struct abstract_sat : asmjit::mem_type
{
const asmjit::Operand& eval(bool)
{
abstract_sat()
#if defined(ARCH_X64)
Operand::operator=(static_cast<asmjit::ppu_builder*>(g_vc)->ppu_sat());
: asmjit::mem_type(static_cast<asmjit::ppu_builder*>(g_vc)->ppu_sat())
#endif
return *this;
{
}
template <typename T>
void operator=(T&& _val) const
{
#if defined(ARCH_X64)
FOR_X64(store_op, kIdMovaps, kIdVmovaps, static_cast<asmjit::ppu_builder*>(g_vc)->ppu_sat(), std::forward<T>(_val));
FOR_X64(store_op, kIdMovaps, kIdVmovaps, *this, std::forward<T>(_val));
#endif
}
} sat{};

View File

@ -196,6 +196,11 @@ namespace asmjit
return mem.eval(std::is_reference_v<T>);
}
inline decltype(auto) arg_eval(const Operand& mem, u32)
{
return mem;
}
inline decltype(auto) arg_eval(Operand& mem, u32)
{
return mem;
@ -206,6 +211,31 @@ namespace asmjit
return std::move(mem);
}
template <typename T>
inline bool arg_use_evex(const auto& op)
{
constexpr auto _class = arg_classify<T>;
if constexpr (_class == arg_class::imm_rv)
return true;
else if constexpr (_class == arg_class::imm_lv)
return false;
else if (op.isMem())
{
// Check if broadcast is set, or if the offset immediate can use disp8*N encoding
mem_type mem{};
mem.copyFrom(op);
if (mem.hasBaseLabel())
return false;
if (mem.hasBroadcast())
return true;
if (!mem.hasOffset() || mem.offset() % mem.size() || u64(mem.offset() + 128) < 256 || u64(mem.offset() / mem.size() + 128) >= 256)
return false;
return true;
}
return false;
}
template <typename A, typename... Args>
vec_type unary_op(x86::Inst::Id op, x86::Inst::Id op2, A&& a, Args&&... args)
{
@ -259,7 +289,7 @@ namespace asmjit
if (utils::has_avx512() && evex_op)
{
if (!dst.hasBaseLabel() && dst.hasOffset() && dst.offset() % dst.size() == 0 && dst.offset() / dst.size() + 128 < 256)
if (!dst.hasBaseLabel() && dst.hasOffset() && dst.offset() % dst.size() == 0 && u64(dst.offset() + 128) >= 256 && u64(dst.offset() / dst.size() + 128) < 256)
{
ensure(!g_vc->evex().emit(evex_op, dst, arg_eval(std::forward<S>(s), 16)));
return;
@ -279,7 +309,7 @@ namespace asmjit
// Use src1 as a destination
src1 = arg_eval(std::forward<A>(a), 16);
if (utils::has_avx512() && evex_op && (arg_classify<B> == arg_class::imm_rv || arg_classify<B> == arg_class::mem_rv || b.isMem()))
if (utils::has_avx512() && evex_op && arg_use_evex<B>(b))
{
ensure(!g_vc->evex().emit(evex_op, src1, src1, arg_eval(std::forward<B>(b), esize), std::forward<Args>(args)...));
return vec_type{src1.id()};
@ -322,7 +352,7 @@ namespace asmjit
}
}
if (utils::has_avx512() && evex_op && (arg_classify<B> == arg_class::imm_rv || arg_classify<B> == arg_class::mem_rv || b.isMem()))
if (utils::has_avx512() && evex_op && arg_use_evex<B>(b))
{
ensure(!g_vc->evex().emit(evex_op, src1, vec_type{a.id()}, arg_eval(std::forward<B>(b), esize), std::forward<Args>(args)...));
return vec_type{src1.id()};
@ -352,7 +382,7 @@ namespace asmjit
}
while (0);
if (utils::has_avx512() && evex_op && (arg_classify<B> == arg_class::imm_rv || arg_classify<B> == arg_class::mem_rv || b.isMem()))
if (utils::has_avx512() && evex_op && arg_use_evex<B>(b))
{
ensure(!g_vc->evex().emit(evex_op, src1, src1, arg_eval(std::forward<B>(b), esize), std::forward<Args>(args)...));
}