mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-17 17:11:23 +00:00
PPU LLVM: Fix VMAXFP, VMINFP NaN handling
This commit is contained in:
parent
17f965c171
commit
6a51c27fde
@ -2744,22 +2744,44 @@ public:
|
||||
}
|
||||
|
||||
// TODO: Support doubles
|
||||
auto fre(value_t<f32[4]> a)
|
||||
template <typename T, typename = std::enable_if_t<llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
|
||||
auto fre(T a)
|
||||
{
|
||||
decltype(a) result;
|
||||
value_t<typename T::type> result;
|
||||
const auto av = a.eval(m_ir);
|
||||
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rcp.ps", av->getType(), av->getType()).getCallee(), {av});
|
||||
return result;
|
||||
}
|
||||
|
||||
auto frsqe(value_t<f32[4]> a)
|
||||
template <typename T, typename = std::enable_if_t<llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
|
||||
auto frsqe(T a)
|
||||
{
|
||||
decltype(a) result;
|
||||
value_t<typename T::type> result;
|
||||
const auto av = a.eval(m_ir);
|
||||
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rsqrt.ps", av->getType(), av->getType()).getCallee(), {av});
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<typename T::type, typename U::type> && llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
|
||||
auto fmax(T a, U b)
|
||||
{
|
||||
value_t<typename T::type> result;
|
||||
const auto av = a.eval(m_ir);
|
||||
const auto bv = b.eval(m_ir);
|
||||
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.max.ps", av->getType(), av->getType(), av->getType()).getCallee(), {av, bv});
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<typename T::type, typename U::type> && llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
|
||||
auto fmin(T a, U b)
|
||||
{
|
||||
value_t<typename T::type> result;
|
||||
const auto av = a.eval(m_ir);
|
||||
const auto bv = b.eval(m_ir);
|
||||
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.min.ps", av->getType(), av->getType(), av->getType()).getCallee(), {av, bv});
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
value_t<u8[16]> pshufb(T1 a, T2 b)
|
||||
{
|
||||
|
@ -973,8 +973,8 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::VMAXFP(ppu_opcode_t op)
|
||||
{
|
||||
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
|
||||
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])));
|
||||
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
|
||||
set_vr(op.vd, vec_handle_result(select(bitcast<u32[4]>(fmin(a, b)) == bitcast<u32[4]>(a), b, a)));
|
||||
}
|
||||
|
||||
void PPUTranslator::VMAXSB(ppu_opcode_t op)
|
||||
@ -1045,8 +1045,8 @@ void PPUTranslator::VMHRADDSHS(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::VMINFP(ppu_opcode_t op)
|
||||
{
|
||||
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
|
||||
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])));
|
||||
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
|
||||
set_vr(op.vd, vec_handle_result(select(bitcast<u32[4]>(fmax(a, b)) == bitcast<u32[4]>(a), b, a)));
|
||||
}
|
||||
|
||||
void PPUTranslator::VMINSB(ppu_opcode_t op)
|
||||
|
Loading…
Reference in New Issue
Block a user