diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 1c86f5288d..b73f865c4f 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -528,14 +528,14 @@ inline v128 vec_handle_denormal(ppu_thread& ppu, v128 a) bool ppu_interpreter::MFVSCR(ppu_thread& ppu, ppu_opcode_t op) { - ppu.vr[op.vd] = v128::from32(0, 0, 0, u32{ppu.sat} | (u32{ppu.nj} << 16)); + ppu.vr[op.vd] = v128::from32(0, 0, 0, u32{ppu.sat != v128{}} | (u32{ppu.nj} << 16)); return true; } bool ppu_interpreter::MTVSCR(ppu_thread& ppu, ppu_opcode_t op) { const u32 vscr = ppu.vr[op.vb]._u32[3]; - ppu.sat = (vscr & 1) != 0; + ppu.sat = v128::from32((vscr & 1) != 0); ppu.nj = (vscr & 0x10000) != 0; ppu.jm_mask = ppu.nj ? ppu_inf_u32 : 0x7fff'ffff; return true; @@ -577,12 +577,12 @@ bool ppu_interpreter_precise::VADDSBS(ppu_thread& ppu, ppu_opcode_t op) if (sum < INT8_MIN) { d._s8[i] = INT8_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum > INT8_MAX) { d._s8[i] = INT8_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -612,12 +612,12 @@ bool ppu_interpreter_precise::VADDSHS(ppu_thread& ppu, ppu_opcode_t op) if (sum < INT16_MIN) { d._s16[i] = INT16_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum > INT16_MAX) { d._s16[i] = INT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -654,12 +654,12 @@ bool ppu_interpreter_precise::VADDSWS(ppu_thread& ppu, ppu_opcode_t op) if (sum < INT32_MIN) { d._s32[i] = INT32_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum > INT32_MAX) { d._s32[i] = INT32_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -695,7 +695,7 @@ bool ppu_interpreter_precise::VADDUBS(ppu_thread& ppu, ppu_opcode_t op) if (sum > UINT8_MAX) { d._u8[i] = UINT8_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -731,7 +731,7 @@ bool ppu_interpreter_precise::VADDUHS(ppu_thread& ppu, ppu_opcode_t op) if (sum > UINT16_MAX) { d._u16[i] = UINT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -770,7 +770,7 @@ bool ppu_interpreter_precise::VADDUWS(ppu_thread& ppu, ppu_opcode_t op) if (sum > UINT32_MAX) { d._u32[i] = UINT32_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -988,13 +988,13 @@ bool ppu_interpreter_precise::VCTSXS(ppu_thread& ppu, ppu_opcode_t op) } else { - ppu.sat = true; + ppu.sat._u32[0] = 1; d._s32[i] = sign ? 0x80000000 : 0x7FFFFFFF; } } else if (exp2 > 30) { - ppu.sat = true; + ppu.sat._u32[0] = 1; d._s32[i] = sign ? 0x80000000 : 0x7FFFFFFF; } else if (exp2 < 0) @@ -1041,13 +1041,13 @@ bool ppu_interpreter_precise::VCTUXS(ppu_thread& ppu, ppu_opcode_t op) } else { - ppu.sat = true; + ppu.sat._u32[0] = 1; d._u32[i] = sign ? 0 : 0xFFFFFFFF; } } else if (exp2 > 31) { - ppu.sat = true; + ppu.sat._u32[0] = 1; d._u32[i] = sign ? 0 : 0xFFFFFFFF; } else if (exp2 < 0) @@ -1056,7 +1056,7 @@ bool ppu_interpreter_precise::VCTUXS(ppu_thread& ppu, ppu_opcode_t op) } else if (sign) { - ppu.sat = true; + ppu.sat._u32[0] = 1; d._u32[i] = 0; } else @@ -1180,12 +1180,12 @@ bool ppu_interpreter_precise::VMHADDSHS(ppu_thread& ppu, ppu_opcode_t op) if (sum < INT16_MIN) { d._s16[i] = INT16_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum > INT16_MAX) { d._s16[i] = INT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -1229,12 +1229,12 @@ bool ppu_interpreter_precise::VMHRADDSHS(ppu_thread& ppu, ppu_opcode_t op) if (sum < INT16_MIN) { d._s16[i] = INT16_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum > INT16_MAX) { d._s16[i] = INT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -1420,12 +1420,12 @@ bool ppu_interpreter_precise::VMSUMSHS(ppu_thread& ppu, ppu_opcode_t op) if (result > 0x7fffffff) { saturated = 0x7fffffff; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (result < INT32_MIN) { saturated = 0x80000000; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else saturated = static_cast(result); @@ -1517,7 +1517,7 @@ bool ppu_interpreter_precise::VMSUMUHS(ppu_thread& ppu, ppu_opcode_t op) if (result > 0xffffffffu) { saturated = 0xffffffff; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else saturated = static_cast(result); @@ -1666,12 +1666,12 @@ bool ppu_interpreter_precise::VPKSHSS(ppu_thread& ppu, ppu_opcode_t op) if (result < INT8_MIN) { d._s8[i + 8] = INT8_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (result > INT8_MAX) { d._s8[i + 8] = INT8_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -1683,12 +1683,12 @@ bool ppu_interpreter_precise::VPKSHSS(ppu_thread& ppu, ppu_opcode_t op) if (result < INT8_MIN) { d._s8[i] = INT8_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (result > INT8_MAX) { d._s8[i] = INT8_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -1717,7 +1717,7 @@ bool ppu_interpreter_precise::VPKSHUS(ppu_thread& ppu, ppu_opcode_t op) const auto all_bits = a | b; if ((all_bits._u64[0] | all_bits._u64[1]) & mask) { - ppu.sat = true; + ppu.sat._u32[0] = 1; } } @@ -1744,12 +1744,12 @@ bool ppu_interpreter_precise::VPKSWSS(ppu_thread& ppu, ppu_opcode_t op) if (result < INT16_MIN) { d._s16[i + 4] = INT16_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (result > INT16_MAX) { d._s16[i + 4] = INT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -1761,12 +1761,12 @@ bool ppu_interpreter_precise::VPKSWSS(ppu_thread& ppu, ppu_opcode_t op) if (result < INT16_MIN) { d._s16[i] = INT16_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (result > INT16_MAX) { d._s16[i] = INT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -1828,12 +1828,12 @@ bool ppu_interpreter_precise::VPKSWUS(ppu_thread& ppu, ppu_opcode_t op) if (result > UINT16_MAX) { result = UINT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (result < 0) { result = 0; - ppu.sat = true; + ppu.sat._u32[0] = 1; } d._u16[h + 4] = result; @@ -1843,12 +1843,12 @@ bool ppu_interpreter_precise::VPKSWUS(ppu_thread& ppu, ppu_opcode_t op) if (result > UINT16_MAX) { result = UINT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (result < 0) { result = 0; - ppu.sat = true; + ppu.sat._u32[0] = 1; } d._u16[h] = result; @@ -1909,7 +1909,7 @@ bool ppu_interpreter_precise::VPKUHUS(ppu_thread& ppu, ppu_opcode_t op) if (result > UINT8_MAX) { result = UINT8_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } d._u8[b + 8] = static_cast(result); @@ -1919,7 +1919,7 @@ bool ppu_interpreter_precise::VPKUHUS(ppu_thread& ppu, ppu_opcode_t op) if (result > UINT8_MAX) { result = UINT8_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } d._u8[b] = static_cast(result); @@ -1980,7 +1980,7 @@ bool ppu_interpreter_precise::VPKUWUS(ppu_thread& ppu, ppu_opcode_t op) if (result > UINT16_MAX) { result = UINT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } d._u16[h + 4] = result; @@ -1990,7 +1990,7 @@ bool ppu_interpreter_precise::VPKUWUS(ppu_thread& ppu, ppu_opcode_t op) if (result > UINT16_MAX) { result = UINT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } d._u16[h] = result; @@ -2430,12 +2430,12 @@ bool ppu_interpreter_precise::VSUBSBS(ppu_thread& ppu, ppu_opcode_t op) if (diff < INT8_MIN) { d._s8[i] = INT8_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (diff > INT8_MAX) { d._s8[i] = INT8_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -2465,12 +2465,12 @@ bool ppu_interpreter_precise::VSUBSHS(ppu_thread& ppu, ppu_opcode_t op) if (diff < INT16_MIN) { d._s16[i] = INT16_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (diff > INT16_MAX) { d._s16[i] = INT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -2518,12 +2518,12 @@ bool ppu_interpreter_precise::VSUBSWS(ppu_thread& ppu, ppu_opcode_t op) if (result < INT32_MIN) { d._s32[w] = INT32_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (result > INT32_MAX) { d._s32[w] = INT32_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else d._s32[w] = static_cast(result); @@ -2556,12 +2556,12 @@ bool ppu_interpreter_precise::VSUBUBS(ppu_thread& ppu, ppu_opcode_t op) if (diff < 0) { d._u8[i] = 0; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (diff > UINT8_MAX) { d._u8[i] = UINT8_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -2597,12 +2597,12 @@ bool ppu_interpreter_precise::VSUBUHS(ppu_thread& ppu, ppu_opcode_t op) if (diff < 0) { d._u16[i] = 0; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (diff > UINT16_MAX) { d._u16[i] = UINT16_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else { @@ -2652,7 +2652,7 @@ bool ppu_interpreter_precise::VSUBUWS(ppu_thread& ppu, ppu_opcode_t op) if (result < 0) { d._u32[w] = 0; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else d._u32[w] = static_cast(result); @@ -2704,12 +2704,12 @@ bool ppu_interpreter_precise::VSUMSWS(ppu_thread& ppu, ppu_opcode_t op) if (sum > INT32_MAX) { d._s32[0] = INT32_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum < INT32_MIN) { d._s32[0] = INT32_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else d._s32[0] = static_cast(sum); @@ -2756,12 +2756,12 @@ bool ppu_interpreter_precise::VSUM2SWS(ppu_thread& ppu, ppu_opcode_t op) if (sum > INT32_MAX) { d._s32[n * 2] = INT32_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum < INT32_MIN) { d._s32[n * 2] = INT32_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else d._s32[n * 2] = static_cast(sum); @@ -2820,12 +2820,12 @@ bool ppu_interpreter_precise::VSUM4SBS(ppu_thread& ppu, ppu_opcode_t op) if (sum > INT32_MAX) { d._s32[w] = INT32_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum < INT32_MIN) { d._s32[w] = INT32_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else d._s32[w] = static_cast(sum); @@ -2880,12 +2880,12 @@ bool ppu_interpreter_precise::VSUM4SHS(ppu_thread& ppu, ppu_opcode_t op) if (sum > INT32_MAX) { d._s32[w] = INT32_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else if (sum < INT32_MIN) { d._s32[w] = INT32_MIN; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else d._s32[w] = static_cast(sum); @@ -2936,7 +2936,7 @@ bool ppu_interpreter_precise::VSUM4UBS(ppu_thread& ppu, ppu_opcode_t op) if (sum > UINT32_MAX) { d._u32[w] = UINT32_MAX; - ppu.sat = true; + ppu.sat._u32[0] = 1; } else d._u32[w] = static_cast(sum); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index ebcd04cd13..75dad5648d 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -65,8 +65,6 @@ #include "util/v128sse.hpp" #include "util/sysinfo.hpp" -const bool s_use_ssse3 = utils::has_ssse3(); - extern atomic_t g_watchdog_hold_ctr; // Should be of the same type diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 74432179c2..324e55804d 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -222,27 +222,6 @@ public: } xer; - /* - Saturation. A sticky status bit indicating that some field in a saturating instruction saturated since the last - time SAT was cleared. In other words when SAT = '1' it remains set to '1' until it is cleared to '0' by an - mtvscr instruction. - 1 The vector saturate instruction implicitly sets when saturation has occurred on the results one of - the vector instructions having saturate in its name: - Move To VSCR (mtvscr) - Vector Add Integer with Saturation (vaddubs, vadduhs, vadduws, vaddsbs, vaddshs, - vaddsws) - Vector Subtract Integer with Saturation (vsububs, vsubuhs, vsubuws, vsubsbs, vsubshs, - vsubsws) - Vector Multiply-Add Integer with Saturation (vmhaddshs, vmhraddshs) - Vector Multiply-Sum with Saturation (vmsumuhs, vmsumshs, vsumsws) - Vector Sum-Across with Saturation (vsumsws, vsum2sws, vsum4sbs, vsum4shs, - vsum4ubs) - Vector Pack with Saturation (vpkuhus, vpkuwus, vpkshus, vpkswus, vpkshss, vpkswss) - Vector Convert to Fixed-Point with Saturation (vctuxs, vctsxs) - 0 Indicates no saturation occurred; mtvscr can explicitly clear this bit. - */ - bool sat{}; - /* Non-Java. A mode control bit that determines whether vector floating-point operations will be performed in a Java-IEEE-C9X-compliant mode or a possibly faster non-Java/non-IEEE mode. @@ -255,6 +234,9 @@ public: */ bool nj = true; + // Sticky saturation bit + v128 sat{}; + // Optimization: precomputed java-mode mask for handling denormals u32 jm_mask = 0x7f80'0000; diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 62392ee844..fab98af4cb 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -42,8 +42,9 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo thread_struct.insert(thread_struct.end(), 2, GetType()); // vrsave, cia thread_struct.insert(thread_struct.end(), 3, GetType()); // so, ov, ca thread_struct.insert(thread_struct.end(), 1, GetType()); // cnt - thread_struct.insert(thread_struct.end(), 2, GetType()); // sat, nj - thread_struct.emplace_back(ArrayType::get(GetType(), 2)); // Padding + thread_struct.insert(thread_struct.end(), 1, GetType()); // nj + thread_struct.emplace_back(ArrayType::get(GetType(), 3)); // Padding + thread_struct.insert(thread_struct.end(), 1, GetType()); // sat thread_struct.insert(thread_struct.end(), 1, GetType()); // jm_mask m_thread_type = StructType::create(m_context, thread_struct, "context_t"); @@ -639,7 +640,7 @@ void PPUTranslator::CompilationError(const std::string& error) void PPUTranslator::MFVSCR(ppu_opcode_t op) { - const auto vscr = m_ir->CreateOr(ZExt(RegLoad(m_sat), GetType()), m_ir->CreateShl(ZExt(RegLoad(m_nj), GetType()), 16)); + const auto vscr = m_ir->CreateOr(ZExt(IsNotZero(RegLoad(m_sat)), GetType()), m_ir->CreateShl(ZExt(RegLoad(m_nj), GetType()), 16)); SetVr(op.vd, m_ir->CreateInsertElement(ConstantAggregateZero::get(GetType()), vscr, m_ir->getInt32(m_is_be ? 3 : 0))); } @@ -649,7 +650,7 @@ void PPUTranslator::MTVSCR(ppu_opcode_t op) const auto nj = Trunc(m_ir->CreateLShr(vscr, 16), GetType()); RegStore(nj, m_nj); if (g_cfg.core.llvm_ppu_jm_handling) RegStore(m_ir->CreateSelect(nj, m_ir->getInt32(0x7f80'0000), m_ir->getInt32(0x7fff'ffff)), m_jm_mask); - RegStore(Trunc(vscr, GetType()), m_sat); + RegStore(m_ir->CreateInsertElement(ConstantAggregateZero::get(GetType()), m_ir->CreateAnd(vscr, 1), m_ir->getInt32(0)), m_sat); } void PPUTranslator::VADDCUW(ppu_opcode_t op) @@ -669,7 +670,7 @@ void PPUTranslator::VADDSBS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = add_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a + b)).value)); + set_sat(r ^ (a + b)); } void PPUTranslator::VADDSHS(ppu_opcode_t op) @@ -677,7 +678,7 @@ void PPUTranslator::VADDSHS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = add_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a + b)).value)); + set_sat(r ^ (a + b)); } void PPUTranslator::VADDSWS(ppu_opcode_t op) @@ -685,7 +686,7 @@ void PPUTranslator::VADDSWS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = add_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a + b)).value)); + set_sat(r ^ (a + b)); } void PPUTranslator::VADDUBM(ppu_opcode_t op) @@ -699,7 +700,7 @@ void PPUTranslator::VADDUBS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = add_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a + b)).value)); + set_sat(r ^ (a + b)); } void PPUTranslator::VADDUHM(ppu_opcode_t op) @@ -713,7 +714,7 @@ void PPUTranslator::VADDUHS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = add_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a + b)).value)); + set_sat(r ^ (a + b)); } void PPUTranslator::VADDUWM(ppu_opcode_t op) @@ -727,7 +728,7 @@ void PPUTranslator::VADDUWS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = add_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a + b)).value)); + set_sat(r ^ (a + b)); } void PPUTranslator::VAND(ppu_opcode_t op) @@ -906,7 +907,7 @@ void PPUTranslator::VCTSXS(ppu_opcode_t op) const auto sat_h = fcmp_ord(scaled >= fsplat(std::pow(2, 31))); const auto converted = fpcast(select(sat_l, const1, scaled)); set_vr(op.vd, select(sat_h, splat(0x7fff'ffff), converted)); - SetSat(IsNotZero(eval(sat_l | sat_h).value)); + set_sat(sext(sat_l) | sext(sat_h)); } void PPUTranslator::VCTUXS(ppu_opcode_t op) @@ -919,7 +920,7 @@ void PPUTranslator::VCTUXS(ppu_opcode_t op) const auto sat_h = fcmp_ord(scaled >= fsplat(std::pow(2, 32))); // TODO ??? const auto converted = fpcast(select(sat_l, const0, scaled)); set_vr(op.vd, select(sat_h, splat(0xffff'ffff), converted)); - SetSat(IsNotZero(eval(sat_l | sat_h).value)); + set_sat(sext(sat_l) | sext(sat_h)); } void PPUTranslator::VEXPTEFP(ppu_opcode_t op) @@ -1038,7 +1039,7 @@ void PPUTranslator::VMHADDSHS(ppu_opcode_t op) const auto m = ((sext(a) * sext(b)) >> 15) + sext(c); const auto r = trunc(min(max(m, splat(-0x8000)), splat(0x7fff))); set_vr(op.vd, r); - SetSat(IsNotZero(eval((m + 0x8000) >> 16).value)); + set_sat(trunc((m + 0x8000) >> 16)); } void PPUTranslator::VMHRADDSHS(ppu_opcode_t op) @@ -1048,7 +1049,7 @@ void PPUTranslator::VMHRADDSHS(ppu_opcode_t op) const auto m = ((sext(a) * sext(b) + splat(0x4000)) >> 15) + sext(c); const auto r = trunc(min(max(m, splat(-0x8000)), splat(0x7fff))); set_vr(op.vd, r); - SetSat(IsNotZero(eval((m + 0x8000) >> 16).value)); + set_sat(trunc((m + 0x8000) >> 16)); } void PPUTranslator::VMINFP(ppu_opcode_t op) @@ -1164,7 +1165,7 @@ void PPUTranslator::VMSUMSHS(ppu_opcode_t op) const auto mx = eval(m ^ sext(m == 0x80000000u)); const auto x = eval(((mx ^ s) & ~(c ^ mx)) >> 31); set_vr(op.vd, eval((z & x) | (s & ~x))); - SetSat(IsNotZero(x.value)); + set_sat(x); } void PPUTranslator::VMSUMUBM(ppu_opcode_t op) @@ -1193,7 +1194,7 @@ void PPUTranslator::VMSUMUHS(ppu_opcode_t op) const auto s2 = eval(s + c); const auto x = eval((s < ml) | (s2 < s)); set_vr(op.vd, select(x, splat(-1), s2)); - SetSat(IsNotZero(x.value)); + set_sat(x); } void PPUTranslator::VMULESB(ppu_opcode_t op) @@ -1333,7 +1334,7 @@ void PPUTranslator::VPKSHSS(ppu_opcode_t op) const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto r = trunc(min(max(ab, splat(-0x80)), splat(0x7f))); set_vr(op.vd, r); - SetSat(IsNotZero(eval(((a + 0x80) | (b + 0x80)) >> 8).value)); + set_sat(((a + 0x80) | (b + 0x80)) >> 8); } void PPUTranslator::VPKSHUS(ppu_opcode_t op) @@ -1343,7 +1344,7 @@ void PPUTranslator::VPKSHUS(ppu_opcode_t op) const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const auto r = trunc(min(max(ab, splat(0)), splat(0xff))); set_vr(op.vd, r); - SetSat(IsNotZero(eval((a | b) >> 8).value)); + set_sat((a | b) >> 8); } void PPUTranslator::VPKSWSS(ppu_opcode_t op) @@ -1353,7 +1354,7 @@ void PPUTranslator::VPKSWSS(ppu_opcode_t op) const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7); const auto r = trunc(min(max(ab, splat(-0x8000)), splat(0x7fff))); set_vr(op.vd, r); - SetSat(IsNotZero(eval(((a + 0x8000) | (b + 0x8000)) >> 16).value)); + set_sat(((a + 0x8000) | (b + 0x8000)) >> 16); } void PPUTranslator::VPKSWUS(ppu_opcode_t op) @@ -1363,7 +1364,7 @@ void PPUTranslator::VPKSWUS(ppu_opcode_t op) const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7); const auto r = trunc(min(max(ab, splat(0)), splat(0xffff))); set_vr(op.vd, r); - SetSat(IsNotZero(eval((a | b) >> 16).value)); + set_sat((a | b) >> 16); } void PPUTranslator::VPKUHUM(ppu_opcode_t op) @@ -1380,7 +1381,7 @@ void PPUTranslator::VPKUHUS(ppu_opcode_t op) const auto tb = bitcast(min(b, splat(0xff))); const auto r = shuffle2(tb, ta, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); set_vr(op.vd, r); - SetSat(IsNotZero(eval((a | b) >> 8).value)); + set_sat((a | b) >> 8); } void PPUTranslator::VPKUWUM(ppu_opcode_t op) @@ -1397,7 +1398,7 @@ void PPUTranslator::VPKUWUS(ppu_opcode_t op) const auto tb = bitcast(min(b, splat(0xffff))); const auto r = shuffle2(tb, ta, 0, 2, 4, 6, 8, 10, 12, 14); set_vr(op.vd, r); - SetSat(IsNotZero(eval((a | b) >> 16).value)); + set_sat((a | b) >> 16); } void PPUTranslator::VREFP(ppu_opcode_t op) @@ -1670,7 +1671,7 @@ void PPUTranslator::VSUBSBS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a - b)).value)); + set_sat(r ^ (a - b)); } void PPUTranslator::VSUBSHS(ppu_opcode_t op) @@ -1678,7 +1679,7 @@ void PPUTranslator::VSUBSHS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a - b)).value)); + set_sat(r ^ (a - b)); } void PPUTranslator::VSUBSWS(ppu_opcode_t op) @@ -1686,7 +1687,7 @@ void PPUTranslator::VSUBSWS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a - b)).value)); + set_sat(r ^ (a - b)); } void PPUTranslator::VSUBUBM(ppu_opcode_t op) @@ -1700,7 +1701,7 @@ void PPUTranslator::VSUBUBS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a - b)).value)); + set_sat(r ^ (a - b)); } void PPUTranslator::VSUBUHM(ppu_opcode_t op) @@ -1714,7 +1715,7 @@ void PPUTranslator::VSUBUHS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a - b)).value)); + set_sat(r ^ (a - b)); } void PPUTranslator::VSUBUWM(ppu_opcode_t op) @@ -1728,7 +1729,7 @@ void PPUTranslator::VSUBUWS(ppu_opcode_t op) const auto [a, b] = get_vrs(op.va, op.vb); const auto r = sub_sat(a, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (a - b)).value)); + set_sat(r ^ (a - b)); } void PPUTranslator::VSUMSWS(ppu_opcode_t op) @@ -1740,7 +1741,7 @@ void PPUTranslator::VSUMSWS(ppu_opcode_t op) const auto s = eval(x + y + z); const auto r = min(max(zshuffle(s, 0, 2) + zshuffle(s, 1, 2), splat(-0x8000'0000ll)), splat(0x7fff'ffff)); set_vr(op.vd, zshuffle(bitcast(r), 0, 4, 4, 4)); - SetSat(IsNotZero(eval((r + 0x8000'0000) >> 32).value)); + set_sat((r + 0x8000'0000) >> 32); } void PPUTranslator::VSUM2SWS(ppu_opcode_t op) @@ -1751,7 +1752,7 @@ void PPUTranslator::VSUM2SWS(ppu_opcode_t op) const auto z = b >> 32; const auto r = min(max(x + y + z, splat(-0x8000'0000ll)), splat(0x7fff'ffff)); set_vr(op.vd, zshuffle(bitcast(r), 0, 4, 2, 4)); - SetSat(IsNotZero(eval((r + 0x8000'0000) >> 32).value)); + set_sat((r + 0x8000'0000) >> 32); } void PPUTranslator::VSUM4SBS(ppu_opcode_t op) @@ -1765,7 +1766,7 @@ void PPUTranslator::VSUM4SBS(ppu_opcode_t op) const auto s = eval(x + y + z + w); // Can't overflow const auto r = add_sat(s, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (s + b)).value)); + set_sat(r ^ (s + b)); } void PPUTranslator::VSUM4SHS(ppu_opcode_t op) @@ -1777,7 +1778,7 @@ void PPUTranslator::VSUM4SHS(ppu_opcode_t op) const auto s = eval(x + y); // Can't overflow const auto r = add_sat(s, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (s + b)).value)); + set_sat(r ^ (s + b)); } void PPUTranslator::VSUM4UBS(ppu_opcode_t op) @@ -1791,7 +1792,7 @@ void PPUTranslator::VSUM4UBS(ppu_opcode_t op) const auto s = eval(x + y + z + w); // Can't overflow const auto r = add_sat(s, b); set_vr(op.vd, r); - SetSat(IsNotZero(eval(r != (s + b)).value)); + set_sat(r ^ (s + b)); } #define UNPACK_PIXEL_OP(px) (px & 0xff00001f) | ((px << 6) & 0x1f0000) | ((px << 3) & 0x1f00) @@ -4813,14 +4814,6 @@ void PPUTranslator::SetOverflow(Value* bit) RegStore(m_ir->CreateOr(RegLoad(m_so), bit), m_so); } -void PPUTranslator::SetSat(Value* bit) -{ - if (m_attr & ppu_attr::has_mfvscr) - { - RegStore(m_ir->CreateOr(RegLoad(m_sat), bit), m_sat); - } -} - Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right) { Value* trap_condition = m_ir->getFalse(); diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index f972120b31..b1d9c062c2 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -79,8 +79,8 @@ class PPUTranslator final : public cpu_translator DEF_VALUE(m_ov, m_g_ov, 168) // XER.OV bit, overflow flag DEF_VALUE(m_ca, m_g_ca, 169) // XER.CA bit, carry flag DEF_VALUE(m_cnt, m_g_cnt, 170) // XER.CNT - DEF_VALUE(m_sat, m_g_sat, 171) // VSCR.SAT bit, sticky saturation flag - DEF_VALUE(m_nj, m_g_nj, 172) // VSCR.NJ bit, non-Java mode + DEF_VALUE(m_nj, m_g_nj, 171) // VSCR.NJ bit, non-Java mode + DEF_VALUE(m_sat, m_g_sat, 173) // VSCR.SAT bit, sticky saturation flag DEF_VALUE(m_jm_mask, m_g_jm_mask, 174) // Java-Mode helper mask #undef DEF_VALUE @@ -118,6 +118,17 @@ public: return result; } + // Update sticky VSCR.SAT bit (|=) + template + void set_sat(T&& expr) + { + if (m_attr & ppu_attr::has_mfvscr) + { + const auto val = expr.eval(m_ir); + RegStore(m_ir->CreateOr(m_ir->CreateBitCast(RegLoad(m_sat), val->getType()), val), m_sat); + } + } + // Get current instruction address llvm::Value* GetAddr(u64 _add = 0); @@ -265,9 +276,6 @@ public: // Set XER.OV bit, and update XER.SO bit (|=) void SetOverflow(llvm::Value*); - // Update sticky VSCR.SAT bit (|=) - void SetSat(llvm::Value*); - // Check condition for trap instructions llvm::Value* CheckTrapCondition(u32 to, llvm::Value* left, llvm::Value* right);