SPU LLVM: Use vrangeps in clamp_smax

- This instruction can clamp a value between a range of values, something which previously needed 2 instructions.
- With the immediate byte set to 0x2 it will compute the minimum between the absolute value of the first input and the second input, and then copy the sign from the first input to the result.
This commit is contained in:
Malcolm Jestadt 2022-06-05 23:53:07 -04:00 committed by Ivan
parent 17e28ae85d
commit ebeeafc94f
2 changed files with 22 additions and 0 deletions

View File

@ -3666,6 +3666,12 @@ public:
{
return llvm_calli<u16[8], T, U, llvm_const_int<u32>>{"llvm.x86.avx512.dbpsadbw.128", {std::forward<T>(a), std::forward<U>(b), llvm_const_int<u32>{c}}};
}
template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<llvm_common_t<T, U>, f32[4]>>>
static auto vrangeps(T&& a, U&& b, u8 c, u8 d)
{
return llvm_calli<f32[4], T, U, llvm_const_int<u32>, T, llvm_const_int<u8>>{"llvm.x86.avx512.mask.range.ps.128", {std::forward<T>(a), std::forward<U>(b), llvm_const_int<u32>{c}, std::forward<T>(a), llvm_const_int<u8>{d}}};
}
};
// Format llvm::SizeType

View File

@ -7908,6 +7908,22 @@ public:
value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
{
if (m_use_avx512)
{
if (is_input_positive(v))
{
return eval(clamp_positive_smax(v));
}
if (auto [ok, data] = get_const_vector(v.value, m_pos); ok)
{
// Avoid pessimation when input is constant
return eval(clamp_positive_smax(clamp_negative_smax(v)));
}
return eval(vrangeps(v, fsplat<f32[4]>(0x7f7fffff), 0x2, 0Xff));
}
return eval(clamp_positive_smax(clamp_negative_smax(v)));
}