mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-15 13:21:14 +00:00
SPU LLVM: Use VDBPSADBW in SUMB
- This instruction can be used to sum bytes horrizontally if the second input vector is all zeroes.
This commit is contained in:
parent
8b3a3e4ac8
commit
f9ab077908
@ -3366,6 +3366,21 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
value_t<u8[16]> vdbpsadbw(T1 a, T2 b, u8 c)
|
||||
{
|
||||
value_t<u8[16]> result;
|
||||
|
||||
const auto data0 = a.eval(m_ir);
|
||||
const auto data1 = b.eval(m_ir);
|
||||
|
||||
const auto immediate = (llvm_const_int<u32>{c});
|
||||
const auto imm8 = immediate.eval(m_ir);
|
||||
|
||||
result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_dbpsadbw_128), {data0, data1, imm8});
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
value_t<u8[16]> vpermb(T1 a, T2 b)
|
||||
{
|
||||
|
@ -7022,6 +7022,23 @@ public:
|
||||
|
||||
void SUMB(spu_opcode_t op)
|
||||
{
|
||||
if (m_use_avx512)
|
||||
{
|
||||
const auto [a, b] = get_vrs<u8[16]>(op.ra, op.rb);
|
||||
const auto zeroes = splat<u8[16]>(0);
|
||||
|
||||
if (op.ra == op.rb && !m_interp_magn)
|
||||
{
|
||||
set_vr(op.rt, vdbpsadbw(a, zeroes, 0));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto ax = vdbpsadbw(a, zeroes, 0);
|
||||
const auto bx = vdbpsadbw(b, zeroes, 0);
|
||||
set_vr(op.rt, shuffle2(ax, bx, 0, 8, 2, 10, 4, 12, 6, 14));
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_use_vnni)
|
||||
{
|
||||
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
||||
|
Loading…
x
Reference in New Issue
Block a user