SPU: Fix FREST

This commit is contained in:
Eladash 2019-11-15 17:58:41 +02:00 committed by Ivan
parent 9b34f00241
commit c9b0f0e734
3 changed files with 45 additions and 13 deletions

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#ifdef LLVM_AVAILABLE
@ -2729,6 +2729,23 @@ public:
return result;
}
// TODO: Support doubles
auto fre(value_t<f32[4]> a)
{
decltype(a) result;
const auto av = a.eval(m_ir);
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rcp.ps", av->getType(), av->getType()).getCallee(), {av});
return result;
}
auto frsqe(value_t<f32[4]> a)
{
decltype(a) result;
const auto av = a.eval(m_ir);
result.value = m_ir->CreateCall(m_module->getOrInsertFunction("llvm.x86.sse.rsqrt.ps", av->getType(), av->getType()).getCallee(), {av});
return result;
}
template <typename T1, typename T2>
value_t<u8[16]> pshufb(T1 a, T2 b)
{

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "SPUInterpreter.h"
#include "Emu/System.h"
@ -1903,21 +1903,29 @@ inline bool isdenormal(double x)
bool spu_interpreter_precise::FREST(spu_thread& spu, spu_opcode_t op)
{
fesetround(FE_TOWARDZERO);
const auto ra = spu.gpr[op.ra];
auto res = v128::fromF(_mm_rcp_ps(ra.vf));
for (int i = 0; i < 4; i++)
{
const float a = spu.gpr[op.ra]._f[i];
float result;
if (fexpf(a) == 0)
const auto a = ra._f[i];
switch (fexpf(a))
{
case 0:
{
spu.fpscr.setDivideByZeroFlag(i);
result = extended(std::signbit(a), 0x7FFFFF);
res._f[i] = extended(std::signbit(a), 0x7FFFFF);
break;
}
case (0x7e800000 >> 23): // Special case for value not handled properly in rcpps
{
res._f[i] = 0.0f;
break;
}
default: break;
}
else if (isextended(a))
result = 0.0f;
else
result = 1 / a;
spu.gpr[op.rt]._f[i] = result;
}
spu.gpr[op.rt] = res;
return true;
}

View File

@ -7216,9 +7216,16 @@ public:
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
set_vr(op.rt, fsplat<f64[4]>(1.0) / get_vr<f64[4]>(op.ra));
{
const auto a = get_vr<f32[4]>(op.ra);
const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
const auto mask_de = eval(noncast<u32[4]>(sext<s32[4]>(fcmp_uno(a == fsplat<f32[4]>(0.)))) >> 1);
set_vr(op.rt, (bitcast<s32[4]>(fre(a)) & ~mask_ov) | noncast<s32[4]>(mask_de));
}
else
set_vr(op.rt, fsplat<f32[4]>(1.0) / get_vr<f32[4]>(op.ra));
{
set_vr(op.rt, fre(get_vr<f32[4]>(op.ra)));
}
}
void FRSQEST(spu_opcode_t op)