diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index b3236a3031..000e61c0fd 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -7354,6 +7354,39 @@ public: if constexpr (std::extent_v == 4) // u32[4] { + // Match division (adjusted) (TODO) + if (auto a = match_vr(op.ra)) + { + static const auto MT = match(); + + if (auto [div_ok, diva, divb] = match_expr(a, MT / MT); div_ok) + { + if (auto b = match_vr(op.rb)) + { + if (auto [add1_ok] = match_expr(b, bitcast(a) + splat(1)); add1_ok) + { + if (auto [fm_ok, a1, b1] = match_expr(x, bitcast(fm(MT, MT)) > splat(-1)); fm_ok) + { + if (auto [fnma_ok] = match_expr(a1, fnms(divb, bitcast(b), diva)); fnma_ok) + { + if (fabs(b1).eval(m_ir) == fsplat(1.0).eval(m_ir)) + { + set_vr(op.rt4, diva / divb); + return true; + } + + if (auto [sel_ok] = match_expr(b1, bitcast((bitcast(diva) & 0x80000000) | 0x3f800000)); sel_ok) + { + set_vr(op.rt4, diva / divb); + return true; + } + } + } + } + } + } + } + if (auto [a, b] = match_vrs(op.ra, op.rb); a || b) { set_vr(op.rt4, select(x, get_vr(op.rb), get_vr(op.ra))); @@ -8449,7 +8482,42 @@ public: } }); - set_vr(op.rt4, fma(get_vr(op.ra), get_vr(op.rb), get_vr(op.rc))); + const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); + + static const auto MT = match(); + + // Match sqrt + if (auto [ok_fnma, a1, b1] = match_expr(a, fnms(MT, MT, fsplat(1.00000011920928955078125))); ok_fnma) + { + if (auto [ok_fm2, a2] = match_expr(b, fm(MT, fsplat(0.5))); ok_fm2 && a2.eq(b1)) + { + if (auto [ok_fm1, a3, b3] = match_expr(c, fm(MT, MT)); ok_fm1 && a3.eq(a1)) + { + if (auto [ok_sqrte, src] = match_expr(a3, spu_rsqrte(MT)); ok_sqrte && src.eq(b3)) + { + erase_stores(a, b, c, a3); + set_vr(op.rt4, fsqrt(fabs(src))); + return; + } + } + } + } + + // Match division (fast) + if (auto [ok_fnma, divb, diva] = match_expr(a, fnms(c, MT, MT)); ok_fnma) + { + if (auto [ok_fm] = match_expr(c, fm(diva, b)); ok_fm) + { + if (auto [ok_re] = match_expr(b, spu_re(divb)); ok_re) + { + erase_stores(b, c); + set_vr(op.rt4, diva / divb); + return; + } + } + } + + set_vr(op.rt4, fma(a, b, c)); } template