PPU LLVM: Fix VMAXFP, VMINFP NaN handling

This commit is contained in:
Eladash 2020-07-30 18:39:45 +03:00 committed by Ani
parent 17f965c171
commit 6a51c27fde
2 changed files with 30 additions and 8 deletions

View File

@ -2744,22 +2744,44 @@ public:
}
// TODO: Support doubles
auto fre(value_t<f32[4]> a)
template <typename T, typename = std::enable_if_t<llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
auto fre(T a)
{
decltype(a) result;
value_t<typename T::type> result;
const auto av = a.eval(m_ir);
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rcp.ps", av->getType(), av->getType()).getCallee(), {av});
return result;
}
auto frsqe(value_t<f32[4]> a)
template <typename T, typename = std::enable_if_t<llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
auto frsqe(T a)
{
decltype(a) result;
value_t<typename T::type> result;
const auto av = a.eval(m_ir);
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rsqrt.ps", av->getType(), av->getType()).getCallee(), {av});
return result;
}
template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<typename T::type, typename U::type> && llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
auto fmax(T a, U b)
{
value_t<typename T::type> result;
const auto av = a.eval(m_ir);
const auto bv = b.eval(m_ir);
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.max.ps", av->getType(), av->getType(), av->getType()).getCallee(), {av, bv});
return result;
}
template <typename T, typename U, typename = std::enable_if_t<std::is_same_v<typename T::type, typename U::type> && llvm_value_t<typename T::type>::esize == 32u && llvm_value_t<typename T::type>::is_float>>
auto fmin(T a, U b)
{
value_t<typename T::type> result;
const auto av = a.eval(m_ir);
const auto bv = b.eval(m_ir);
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.min.ps", av->getType(), av->getType(), av->getType()).getCallee(), {av, bv});
return result;
}
template <typename T1, typename T2>
value_t<u8[16]> pshufb(T1 a, T2 b)
{

View File

@ -973,8 +973,8 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
void PPUTranslator::VMAXFP(ppu_opcode_t op)
{
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])));
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
set_vr(op.vd, vec_handle_result(select(bitcast<u32[4]>(fmin(a, b)) == bitcast<u32[4]>(a), b, a)));
}
void PPUTranslator::VMAXSB(ppu_opcode_t op)
@ -1045,8 +1045,8 @@ void PPUTranslator::VMHRADDSHS(ppu_opcode_t op)
void PPUTranslator::VMINFP(ppu_opcode_t op)
{
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])));
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
set_vr(op.vd, vec_handle_result(select(bitcast<u32[4]>(fmax(a, b)) == bitcast<u32[4]>(a), b, a)));
}
void PPUTranslator::VMINSB(ppu_opcode_t op)