mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-16 07:20:59 +00:00
SPU LLVM: Use VDBPSADBW in SUMB
- This instruction can be used to sum bytes horrizontally if the second input vector is all zeroes.
This commit is contained in:
parent
8b3a3e4ac8
commit
f9ab077908
@ -3366,6 +3366,21 @@ public:
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2>
|
||||||
|
value_t<u8[16]> vdbpsadbw(T1 a, T2 b, u8 c)
|
||||||
|
{
|
||||||
|
value_t<u8[16]> result;
|
||||||
|
|
||||||
|
const auto data0 = a.eval(m_ir);
|
||||||
|
const auto data1 = b.eval(m_ir);
|
||||||
|
|
||||||
|
const auto immediate = (llvm_const_int<u32>{c});
|
||||||
|
const auto imm8 = immediate.eval(m_ir);
|
||||||
|
|
||||||
|
result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_dbpsadbw_128), {data0, data1, imm8});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2>
|
template <typename T1, typename T2>
|
||||||
value_t<u8[16]> vpermb(T1 a, T2 b)
|
value_t<u8[16]> vpermb(T1 a, T2 b)
|
||||||
{
|
{
|
||||||
|
@ -7022,6 +7022,23 @@ public:
|
|||||||
|
|
||||||
void SUMB(spu_opcode_t op)
|
void SUMB(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
|
if (m_use_avx512)
|
||||||
|
{
|
||||||
|
const auto [a, b] = get_vrs<u8[16]>(op.ra, op.rb);
|
||||||
|
const auto zeroes = splat<u8[16]>(0);
|
||||||
|
|
||||||
|
if (op.ra == op.rb && !m_interp_magn)
|
||||||
|
{
|
||||||
|
set_vr(op.rt, vdbpsadbw(a, zeroes, 0));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto ax = vdbpsadbw(a, zeroes, 0);
|
||||||
|
const auto bx = vdbpsadbw(b, zeroes, 0);
|
||||||
|
set_vr(op.rt, shuffle2(ax, bx, 0, 8, 2, 10, 4, 12, 6, 14));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (m_use_vnni)
|
if (m_use_vnni)
|
||||||
{
|
{
|
||||||
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user