diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 86655189e4..6be29bc75d 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -8865,11 +8865,10 @@ public: register_intrinsic("spu_frest", [&](llvm::CallInst* ci) { const auto a = value(ci->getOperand(0)); - const auto acc_result = fsplat(1.0) / a; - // Determines accuracy penalty, frest result is always slightly closer to 0 than actual value and provides ~12 bits accuracy - const auto acc_penalty = fsplat(0x1p-13f) * acc_result; + // Gives accuracy penalty, frest result is within one newton-raphson iteration for accuracy + const auto approx_result = fsplat(0.999875069f) / a; // Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed - return bitcast(bitcast(acc_result - acc_penalty) & splat(0xFFFFF800)); + return bitcast(bitcast(approx_result) & splat(0xFFFFF800)); }); } else @@ -8905,11 +8904,10 @@ public: register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci) { const auto a = value(ci->getOperand(0)); - const auto acc_result = fsplat(1.0) / fsqrt(fabs(a)); - // Determines accuracy penalty, frsqest result is always slightly closer to 0 than actual value and provides ~12 bits accuracy - const auto acc_penalty = fsplat(0x1p-13f) * acc_result; + // Gives accuracy penalty, frsqest result is within one newton-raphson iteration for accuracy + const auto approx_result = fsplat(0.999763668f) / fsqrt(fabs(a)); // Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed - return bitcast(bitcast(acc_result - acc_penalty) & splat(0xFFFFF800)); + return bitcast(bitcast(approx_result) & splat(0xFFFFF800)); }); } else @@ -9648,19 +9646,17 @@ public: register_intrinsic("spu_re", [&](llvm::CallInst* ci) { const auto a = value(ci->getOperand(0)); - const auto acc_result = fsplat(1.0) / a; - // Determines accuracy penalty, frest result is always slightly closer to 0 than actual value and provides ~12 bits accuracy - const auto acc_penalty = fsplat(0x1p-13f) * acc_result; - return acc_result - acc_penalty; + // Gives accuracy penalty, frest result is within one newton-raphson iteration for accuracy + const auto approx_result = fsplat(0.999875069f) / a; + return approx_result; }); register_intrinsic("spu_rsqrte", [&](llvm::CallInst* ci) { const auto a = value(ci->getOperand(0)); - const auto acc_result = fsplat(1.0) / fsqrt(fabs(a)); - // Determines accuracy penalty, frsqest result is always slightly closer to 0 than actual value and provides ~12 bits accuracy - const auto acc_penalty = fsplat(0x1p-13f) * acc_result; - return acc_result - acc_penalty; + // Gives accuracy penalty, frsqest result is within one newton-raphson iteration for accuracy + const auto approx_result = fsplat(0.999763668f) / fsqrt(fabs(a)); + return approx_result; }); } else