SPU LLVM: AVX-512 optimization for CFLTU (#14384)

- Takes advantage of vrangeps and the new float to uint instructions from AVX-512
- Down from 6 to 3 instructions

TODO: Somehow ensure that this is what llvm outputs using CreateFPToUI?
This commit is contained in:
Whatcookie 2023-07-29 02:01:01 -04:00 committed by GitHub
parent dabb2cc9a0
commit fd6829f757
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -9871,6 +9871,15 @@ public:
a = eval(a * s);
value_t<s32[4]> r;
if (m_use_avx512)
{
const auto sc = clamp_smax(a);
r.value = m_ir->CreateFPToUI(sc.value, get_type<s32[4]>());
set_vr(op.rt, r);
return;
}
r.value = m_ir->CreateFPToUI(a.value, get_type<s32[4]>());
set_vr(op.rt, select(bitcast<s32[4]>(a) > splat<s32[4]>(((32 + 127) << 23) - 1), splat<s32[4]>(-1), r & ~(bitcast<s32[4]>(a) >> 31)));
}