mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 12:32:43 +00:00
PPU LLVM: Use VPERM2B to emulate VPERM (#8704)
- The VPERM2B instructions are a match of VPERM's behavior, besides operating in reverse byte order
This commit is contained in:
parent
0c85d4c0d0
commit
4ce2ad54a8
@ -57,6 +57,15 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
|
||||
{
|
||||
m_use_fma = true;
|
||||
}
|
||||
|
||||
// Test AVX-512_icelake features (TODO)
|
||||
if (cpu == "icelake" ||
|
||||
cpu == "icelake-client" ||
|
||||
cpu == "icelake-server" ||
|
||||
cpu == "tigerlake")
|
||||
{
|
||||
m_use_avx512_icl = true;
|
||||
}
|
||||
}
|
||||
|
||||
llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type)
|
||||
|
@ -2423,6 +2423,9 @@ protected:
|
||||
// Allow FMA
|
||||
bool m_use_fma = false;
|
||||
|
||||
// Allow Icelake tier AVX-512
|
||||
bool m_use_avx512_icl = false;
|
||||
|
||||
// IR builder
|
||||
llvm::IRBuilder<>* m_ir;
|
||||
|
||||
@ -2782,6 +2785,33 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
value_t<u8[16]> vperm2b(T1 a, T2 b, T3 c)
|
||||
{
|
||||
value_t<u8[16]> result;
|
||||
|
||||
const auto data0 = a.eval(m_ir);
|
||||
const auto data1 = b.eval(m_ir);
|
||||
const auto index = c.eval(m_ir);
|
||||
const auto zeros = llvm::ConstantAggregateZero::get(get_type<u8[16]>());
|
||||
|
||||
if (auto c = llvm::dyn_cast<llvm::Constant>(index))
|
||||
{
|
||||
// Convert VPERM2B index back to LLVM vector shuffle mask
|
||||
const auto cv = llvm::dyn_cast<llvm::ConstantDataVector>(c);
|
||||
|
||||
if (cv || llvm::isa<llvm::ConstantAggregateZero>(c))
|
||||
{
|
||||
result.value = m_ir->CreateZExt(cv, get_type<u32[16]>());
|
||||
result.value = m_ir->CreateShuffleVector(data0, data1, result.value);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_vpermi2var_qi_128), {data0, index, data1});
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
value_t<u8[16]> pshufb(T1 a, T2 b)
|
||||
{
|
||||
|
@ -1295,6 +1295,14 @@ void PPUTranslator::VPERM(ppu_opcode_t op)
|
||||
const auto a = get_vr<u8[16]>(op.va);
|
||||
const auto b = get_vr<u8[16]>(op.vb);
|
||||
const auto c = get_vr<u8[16]>(op.vc);
|
||||
|
||||
if (m_use_avx512_icl && op.ra != op.rb)
|
||||
{
|
||||
const auto i = eval(~c);
|
||||
set_vr(op.vd, vperm2b(b, a, i));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto i = eval(~c & 0x1f);
|
||||
set_vr(op.vd, select(noncast<s8[16]>(c << 3) >= 0, pshufb(a, i), pshufb(b, i)));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user