mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-29 00:33:01 +00:00
SPU LLVM: Use vrangeps in clamp_smax
- This instruction can clamp a value between a range of values, something which previously needed 2 instructions. - With the immediate byte set to 0x2 it will compute the minimum between the absolute value of the first input and the second input, and then copy the sign from the first input to the result.
This commit is contained in:
parent
17e28ae85d
commit
ebeeafc94f
@ -3666,6 +3666,12 @@ public:
|
||||
{
|
||||
return llvm_calli<u16[8], T, U, llvm_const_int<u32>>{"llvm.x86.avx512.dbpsadbw.128", {std::forward<T>(a), std::forward<U>(b), llvm_const_int<u32>{c}}};
|
||||
}
|
||||
|
||||
template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<llvm_common_t<T, U>, f32[4]>>>
|
||||
static auto vrangeps(T&& a, U&& b, u8 c, u8 d)
|
||||
{
|
||||
return llvm_calli<f32[4], T, U, llvm_const_int<u32>, T, llvm_const_int<u8>>{"llvm.x86.avx512.mask.range.ps.128", {std::forward<T>(a), std::forward<U>(b), llvm_const_int<u32>{c}, std::forward<T>(a), llvm_const_int<u8>{d}}};
|
||||
}
|
||||
};
|
||||
|
||||
// Format llvm::SizeType
|
||||
|
@ -7908,6 +7908,22 @@ public:
|
||||
|
||||
value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
|
||||
{
|
||||
if (m_use_avx512)
|
||||
{
|
||||
if (is_input_positive(v))
|
||||
{
|
||||
return eval(clamp_positive_smax(v));
|
||||
}
|
||||
|
||||
if (auto [ok, data] = get_const_vector(v.value, m_pos); ok)
|
||||
{
|
||||
// Avoid pessimation when input is constant
|
||||
return eval(clamp_positive_smax(clamp_negative_smax(v)));
|
||||
}
|
||||
|
||||
return eval(vrangeps(v, fsplat<f32[4]>(0x7f7fffff), 0x2, 0Xff));
|
||||
}
|
||||
|
||||
return eval(clamp_positive_smax(clamp_negative_smax(v)));
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user