Revert FM(a*re_accurate) shortcut to relaxed

This commit is contained in:
RipleyTom 2024-02-13 18:07:26 +01:00 committed by Elad.Ash
parent 50b1402d66
commit af33c0c69f

View File

@ -5620,32 +5620,37 @@ public:
const auto [a, b] = get_vrs<f32[4]>(op.ra, op.rb);
auto full_fm_accurate = [&](const auto& a, const auto& div)
// This causes issues in LBP 1(first platform on first temple level doesn't come down when grabbed)
// Presumably 1/x might result in Zero/NaN when a/x doesn't
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
{
const auto div_result = a / div;
const auto result_and = bitcast<u32[4]>(div_result) & 0x7fffffffu;
const auto result_cmp_inf = sext<s32[4]>(result_and == splat<u32[4]>(0x7F800000u));
const auto result_cmp_nan = sext<s32[4]>(result_and <= splat<u32[4]>(0x7F800000u));
auto full_fm_accurate = [&](const auto& a, const auto& div)
{
const auto div_result = a / div;
const auto result_and = bitcast<u32[4]>(div_result) & 0x7fffffffu;
const auto result_cmp_inf = sext<s32[4]>(result_and == splat<u32[4]>(0x7F800000u));
const auto result_cmp_nan = sext<s32[4]>(result_and <= splat<u32[4]>(0x7F800000u));
const auto and_mask = bitcast<u32[4]>(result_cmp_nan) & splat<u32[4]>(0xFFFFFFFFu);
const auto or_mask = bitcast<u32[4]>(result_cmp_inf) & splat<u32[4]>(0xFFFFFFFu);
set_vr(op.rt, bitcast<f32[4]>((bitcast<u32[4]>(div_result) & and_mask) | or_mask));
};
const auto and_mask = bitcast<u32[4]>(result_cmp_nan) & splat<u32[4]>(0xFFFFFFFFu);
const auto or_mask = bitcast<u32[4]>(result_cmp_inf) & splat<u32[4]>(0xFFFFFFFu);
set_vr(op.rt, bitcast<f32[4]>((bitcast<u32[4]>(div_result) & and_mask) | or_mask));
};
// FM(a, re_accurate(div))
if (const auto [ok_re_acc, div, one] = match_expr(b, re_accurate(match<f32[4]>(), match<f32[4]>())); ok_re_acc)
{
full_fm_accurate(a, div);
erase_stores(one, b);
return;
}
// FM(a, re_accurate(div))
if (const auto [ok_re_acc, div, one] = match_expr(b, re_accurate(match<f32[4]>(), match<f32[4]>())); ok_re_acc)
{
full_fm_accurate(a, div);
erase_stores(one, b);
return;
}
// FM(re_accurate(div), b)
if (const auto [ok_re_acc, div, one] = match_expr(a, re_accurate(match<f32[4]>(), match<f32[4]>())); ok_re_acc)
{
full_fm_accurate(b, div);
erase_stores(one, a);
return;
// FM(re_accurate(div), b)
if (const auto [ok_re_acc, div, one] = match_expr(a, re_accurate(match<f32[4]>(), match<f32[4]>())); ok_re_acc)
{
full_fm_accurate(b, div);
erase_stores(one, a);
return;
}
}
set_vr(op.rt, fm(a, b));