diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 1b64c00530..51b0d6fb54 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -2756,16 +2756,77 @@ void spu_recompiler::FSMB(spu_opcode_t op) void spu_recompiler::FREST(spu_opcode_t op) { const XmmLink& va = XmmGet(op.ra, XmmType::Float); - c->rcpps(va, va); - c->movaps(SPU_OFF_128(gpr, op.rt), va); + const XmmLink& v_fraction = XmmAlloc(); + const XmmLink& v_exponent = XmmAlloc(); + const XmmLink& v_sign = XmmAlloc(); + c->movdqa(v_fraction, va); + c->movdqa(v_exponent, va); + c->movdqa(v_sign, va); + + c->psrld(v_fraction, 18); + c->psrld(v_exponent, 23); + + c->andps(v_fraction, XmmConst(v128::from32p(0x1F))); + c->andps(v_exponent, XmmConst(v128::from32p(0xFF))); + c->andps(v_sign, XmmConst(v128::from32p(0x80000000))); + + const u64 fraction_lut_addr = reinterpret_cast(spu_frest_fraction_lut); + const u64 exponent_lut_addr = reinterpret_cast(spu_frest_exponent_lut); + + for (u32 index = 0; index < 4; index++) + { + c->pextrd(*qw0, v_fraction, index); + c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2)); + c->pinsrd(v_fraction, *qw1, index); + + c->pextrd(*qw0, v_exponent, index); + c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2)); + c->pinsrd(v_exponent, *qw1, index); + } + + // AVX2(not working?) + // c->mov(qw1->r64(),spu_frest_fraction_lut); + // c->vpgatherdd(v_fraction, asmjit::x86::dword_ptr(*qw1)); + // c->mov(qw0->r64(),spu_frest_exponent_lut); + // c->vpgatherdd(v_exponent, asmjit::x86::dword_ptr(*qw0)); + + c->orps(v_fraction, v_exponent); + c->orps(v_sign, v_fraction); + + c->movaps(SPU_OFF_128(gpr, op.rt), v_sign); } void spu_recompiler::FRSQEST(spu_opcode_t op) { const XmmLink& va = XmmGet(op.ra, XmmType::Float); - c->andps(va, XmmConst(v128::from32p(0x7fffffff))); // abs - c->rsqrtps(va, va); - c->movaps(SPU_OFF_128(gpr, op.rt), va); + const XmmLink& v_fraction = XmmAlloc(); + const XmmLink& v_exponent = XmmAlloc(); + c->movdqa(v_fraction, va); + c->movdqa(v_exponent, va); + + c->psrld(v_fraction, 18); + c->psrld(v_exponent, 23); + + c->andps(v_fraction, XmmConst(v128::from32p(0x3F))); + c->andps(v_exponent, XmmConst(v128::from32p(0xFF))); + + const u64 fraction_lut_addr = reinterpret_cast(spu_frsqest_fraction_lut); + const u64 exponent_lut_addr = reinterpret_cast(spu_frsqest_exponent_lut); + + for (u32 index = 0; index < 4; index++) + { + c->pextrd(*qw0, v_fraction, index); + c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2)); + c->pinsrd(v_fraction, *qw1, index); + + c->pextrd(*qw0, v_exponent, index); + c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2)); + c->pinsrd(v_exponent, *qw1, index); + } + + c->orps(v_fraction, v_exponent); + + c->movaps(SPU_OFF_128(gpr, op.rt), v_fraction); } void spu_recompiler::LQX(spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index b3e1f729a9..871c65b152 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -689,15 +689,48 @@ bool FSMB(spu_thread& spu, spu_opcode_t op) template bool FREST(spu_thread& spu, spu_opcode_t op) { - spu.gpr[op.rt] = _mm_rcp_ps(spu.gpr[op.ra]); + v128 fraction_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 18)) & v128(_mm_set1_epi32(0x1F)); + v128 exponent_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 23)) & v128(_mm_set1_epi32(0xFF)); + v128 sign = spu.gpr[op.ra] & _mm_set1_epi32(0x80000000); + + // AVX2 + // v128 fraction = _mm_i32gather_epi32(spu_frest_fraction_lut, fraction_index, 4); + // v128 exponent = _mm_i32gather_epi32(spu_frest_exponent_lut, exponent_index, 4); + + v128 result; + + for (u32 index = 0; index < 4; index++) + { + u32 r = spu_frest_fraction_lut[fraction_index._u32[index]]; + r |= spu_frest_exponent_lut[exponent_index._u32[index]]; + r |= sign._u32[index]; + result._u32[index] = r; + } + + spu.gpr[op.rt] = result; return true; } template bool FRSQEST(spu_thread& spu, spu_opcode_t op) { - const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); - spu.gpr[op.rt] = _mm_rsqrt_ps(_mm_and_ps(spu.gpr[op.ra], mask)); + v128 fraction_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 18)) & v128(_mm_set1_epi32(0x3F)); + v128 exponent_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 23)) & v128(_mm_set1_epi32(0xFF)); + + // AVX2 + // v128 fraction = _mm_i32gather_epi32(spu_frsqest_fraction_lut, fraction_index, 4); + // v128 exponent = _mm_i32gather_epi32(spu_frsqest_exponent_lut, exponent_index, 4); + + v128 result; + + for (u32 index = 0; index < 4; index++) + { + u32 r = spu_frsqest_fraction_lut[fraction_index._u32[index]]; + r |= spu_frsqest_exponent_lut[exponent_index._u32[index]]; + result._u32[index] = r; + } + + spu.gpr[op.rt] = result; return true; } diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 279eeb53bf..ccbeaaeceb 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -107,6 +107,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Global variable (function table) llvm::GlobalVariable* m_function_table{}; + // Global LUTs + llvm::GlobalVariable* m_spu_frest_fraction_lut{}; + llvm::GlobalVariable* m_spu_frest_exponent_lut{}; + llvm::GlobalVariable* m_spu_frsqest_fraction_lut{}; + llvm::GlobalVariable* m_spu_frsqest_exponent_lut{}; + // Helpers (interpreter) llvm::GlobalVariable* m_scale_float_to{}; llvm::GlobalVariable* m_scale_to_float{}; @@ -1091,6 +1097,15 @@ public: } } + void init_luts() + { + // LUTs for some instructions + m_spu_frest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 32), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_fraction_lut)); + m_spu_frest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_exponent_lut)); + m_spu_frsqest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 64), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_fraction_lut)); + m_spu_frsqest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_exponent_lut)); + } + virtual spu_function_t compile(spu_program&& _func) override { if (_func.data.empty() && m_interp_magn) @@ -1179,6 +1194,8 @@ public: main_func->setCallingConv(CallingConv::GHC); set_function(main_func); + init_luts(); + // Start compilation const auto label_test = BasicBlock::Create(m_context, "", m_function); const auto label_diff = BasicBlock::Create(m_context, "", m_function); @@ -2158,6 +2175,8 @@ public: const auto if_type = get_ftype(); m_function_table = new GlobalVariable(*m_module, ArrayType::get(if_type->getPointerTo(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr); + init_luts(); + // Add return function const auto ret_func = cast(_module->getOrInsertFunction("spu_ret", if_type).getCallee()); ret_func->setCallingConv(CallingConv::GHC); @@ -5297,36 +5316,29 @@ public: void FREST(spu_opcode_t op) { - // TODO - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) + register_intrinsic("spu_frest", [&](llvm::CallInst* ci) { - const auto a = get_vr(op.ra); - const auto mask_ov = sext(bitcast(fabs(a)) > splat(0x7e7fffff)); - const auto mask_de = eval(noncast(sext(fcmp_ord(a == fsplat(0.)))) >> 1); - set_vr(op.rt, (bitcast(fsplat(1.0) / a) & ~mask_ov) | noncast(mask_de)); - return; - } + const auto a = bitcast(value(ci->getOperand(0))); - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) - { - register_intrinsic("spu_frest", [&](llvm::CallInst* ci) + const auto a_fraction = (a >> splat(18)) & splat(0x1F); + const auto a_exponent = (a >> splat(23)) & splat(0xFF); + const auto a_sign = (a & splat(0x80000000)); + value_t final_result = eval(splat(0)); + + for (u32 i = 0; i < 4; i++) { - const auto a = value(ci->getOperand(0)); - // Gives accuracy penalty, frest result is within one newton-raphson iteration for accuracy - const auto approx_result = fsplat(0.999875069f) / a; - // Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed - return bitcast(bitcast(approx_result) & splat(0xFFFFF800)); - }); - } - else - { - register_intrinsic("spu_frest", [&](llvm::CallInst* ci) - { - const auto a = value(ci->getOperand(0)); - // Fast but this makes the result vary per cpu - return fre(a); - }); - } + const auto eval_fraction = eval(extract(a_fraction, i)); + const auto eval_exponent = eval(extract(a_exponent, i)); + const auto eval_sign = eval(extract(a_sign, i)); + + value_t r_fraction = load_const(m_spu_frest_fraction_lut, eval_fraction); + value_t r_exponent = load_const(m_spu_frest_exponent_lut, eval_exponent); + + final_result = eval(insert(final_result, i, eval(r_fraction | eval_sign | r_exponent))); + } + + return bitcast(final_result); + }); set_vr(op.rt, frest(get_vr(op.ra))); } @@ -5339,33 +5351,27 @@ public: void FRSQEST(spu_opcode_t op) { - // TODO - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) + register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci) { - set_vr(op.rt, fsplat(1.0) / fsqrt(fabs(get_vr(op.ra)))); - return; - } + const auto a = bitcast(value(ci->getOperand(0))); - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) - { - register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci) + const auto a_fraction = (a >> splat(18)) & splat(0x3F); + const auto a_exponent = (a >> splat(23)) & splat(0xFF); + value_t final_result = eval(splat(0)); + + for (u32 i = 0; i < 4; i++) { - const auto a = value(ci->getOperand(0)); - // Gives accuracy penalty, frsqest result is within one newton-raphson iteration for accuracy - const auto approx_result = fsplat(0.999763668f) / fsqrt(fabs(a)); - // Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed - return bitcast(bitcast(approx_result) & splat(0xFFFFF800)); - }); - } - else - { - register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci) - { - const auto a = value(ci->getOperand(0)); - // Fast but this makes the result vary per cpu - return frsqe(fabs(a)); - }); - } + const auto eval_fraction = eval(extract(a_fraction, i)); + const auto eval_exponent = eval(extract(a_exponent, i)); + + value_t r_fraction = load_const(m_spu_frsqest_fraction_lut, eval_fraction); + value_t r_exponent = load_const(m_spu_frsqest_exponent_lut, eval_exponent); + + final_result = eval(insert(final_result, i, eval(r_fraction | r_exponent))); + } + + return bitcast(final_result); + }); set_vr(op.rt, frsqest(get_vr(op.ra))); } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 0b92069975..ae8206ad1d 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -46,6 +46,64 @@ #endif #endif +// LUTs for SPU instructions + +const u32 spu_frest_fraction_lut[32] = +{ + 0x7FFBE0, 0x7F87A6, 0x70EF72, 0x708B40, 0x638B12, 0x633AEA, 0x5792C4, 0x574AA0, + 0x4CCA7E, 0x4C9262, 0x430A44, 0x42D62A, 0x3A2E12, 0x39FDFA, 0x3215E4, 0x31F1D2, + 0x2AA9BE, 0x2A85AC, 0x23D59A, 0x23BD8E, 0x1D8576, 0x1D8576, 0x17AD5A, 0x17AD5A, + 0x124543, 0x124543, 0x0D392D, 0x0D392D, 0x08851A, 0x08851A, 0x041D07, 0x041D07 +}; + +const u32 spu_frest_exponent_lut[256] = +{ + 0x7F800000, 0x7E000000, 0x7D800000, 0x7D000000, 0x7C800000, 0x7C000000, 0x7B800000, 0x7B000000, 0x7A800000, 0x7A000000, 0x79800000, 0x79000000, 0x78800000, 0x78000000, 0x77800000, 0x77000000, + 0x76800000, 0x76000000, 0x75800000, 0x75000000, 0x74800000, 0x74000000, 0x73800000, 0x73000000, 0x72800000, 0x72000000, 0x71800000, 0x71000000, 0x70800000, 0x70000000, 0x6F800000, 0x6F000000, + 0x6E800000, 0x6E000000, 0x6D800000, 0x6D000000, 0x6C800000, 0x6C000000, 0x6B800000, 0x6B000000, 0x6A800000, 0x6A000000, 0x69800000, 0x69000000, 0x68800000, 0x68000000, 0x67800000, 0x67000000, + 0x66800000, 0x66000000, 0x65800000, 0x65000000, 0x64800000, 0x64000000, 0x63800000, 0x63000000, 0x62800000, 0x62000000, 0x61800000, 0x61000000, 0x60800000, 0x60000000, 0x5F800000, 0x5F000000, + 0x5E800000, 0x5E000000, 0x5D800000, 0x5D000000, 0x5C800000, 0x5C000000, 0x5B800000, 0x5B000000, 0x5A800000, 0x5A000000, 0x59800000, 0x59000000, 0x58800000, 0x58000000, 0x57800000, 0x57000000, + 0x56800000, 0x56000000, 0x55800000, 0x55000000, 0x54800000, 0x54000000, 0x53800000, 0x53000000, 0x52800000, 0x52000000, 0x51800000, 0x51000000, 0x50800000, 0x50000000, 0x4F800000, 0x4F000000, + 0x4E800000, 0x4E000000, 0x4D800000, 0x4D000000, 0x4C800000, 0x4C000000, 0x4B800000, 0x4B000000, 0x4A800000, 0x4A000000, 0x49800000, 0x49000000, 0x48800000, 0x48000000, 0x47800000, 0x47000000, + 0x46800000, 0x46000000, 0x45800000, 0x45000000, 0x44800000, 0x44000000, 0x43800000, 0x43000000, 0x42800000, 0x42000000, 0x41800000, 0x41000000, 0x40800000, 0x40000000, 0x3F800000, 0x3F000000, + 0x3E800000, 0x3E000000, 0x3D800000, 0x3D000000, 0x3C800000, 0x3C000000, 0x3B800000, 0x3B000000, 0x3A800000, 0x3A000000, 0x39800000, 0x39000000, 0x38800000, 0x38000000, 0x37800000, 0x37000000, + 0x36800000, 0x36000000, 0x35800000, 0x35000000, 0x34800000, 0x34000000, 0x33800000, 0x33000000, 0x32800000, 0x32000000, 0x31800000, 0x31000000, 0x30800000, 0x30000000, 0x2F800000, 0x2F000000, + 0x2E800000, 0x2E000000, 0x2D800000, 0x2D000000, 0x2C800000, 0x2C000000, 0x2B800000, 0x2B000000, 0x2A800000, 0x2A000000, 0x29800000, 0x29000000, 0x28800000, 0x28000000, 0x27800000, 0x27000000, + 0x26800000, 0x26000000, 0x25800000, 0x25000000, 0x24800000, 0x24000000, 0x23800000, 0x23000000, 0x22800000, 0x22000000, 0x21800000, 0x21000000, 0x20800000, 0x20000000, 0x1F800000, 0x1F000000, + 0x1E800000, 0x1E000000, 0x1D800000, 0x1D000000, 0x1C800000, 0x1C000000, 0x1B800000, 0x1B000000, 0x1A800000, 0x1A000000, 0x19800000, 0x19000000, 0x18800000, 0x18000000, 0x17800000, 0x17000000, + 0x16800000, 0x16000000, 0x15800000, 0x15000000, 0x14800000, 0x14000000, 0x13800000, 0x13000000, 0x12800000, 0x12000000, 0x11800000, 0x11000000, 0x10800000, 0x10000000, 0x0F800000, 0x0F000000, + 0x0E800000, 0x0E000000, 0x0D800000, 0x0D000000, 0x0C800000, 0x0C000000, 0x0B800000, 0x0B000000, 0x0A800000, 0x0A000000, 0x09800000, 0x09000000, 0x08800000, 0x08000000, 0x07800000, 0x07000000, + 0x06800000, 0x06000000, 0x05800000, 0x05000000, 0x04800000, 0x04000000, 0x03800000, 0x03000000, 0x02800000, 0x02000000, 0x01800000, 0x01000000, 0x00800000, 0x00000000, 0x00000000, 0x00000000 +}; + +const u32 spu_frsqest_fraction_lut[64] = +{ + 0x350160, 0x34E954, 0x2F993D, 0x2F993D, 0x2AA523, 0x2AA523, 0x26190D, 0x26190D, 0x21E4F9, 0x21E4F9, 0x1E00E9, 0x1E00E9, 0x1A5CD9, 0x1A5CD9, 0x16F8CB, 0x16F8CB, + 0x13CCC0, 0x13CCC0, 0x10CCB3, 0x10CCB3, 0x0E00AA, 0x0E00AA, 0x0B58A1, 0x0B58A1, 0x08D498, 0x08D498, 0x067491, 0x067491, 0x043089, 0x043089, 0x020C83, 0x020C83, + 0x7FFDF4, 0x7FD1DE, 0x7859C8, 0x783DBA, 0x71559C, 0x71559C, 0x6AE57C, 0x6AE57C, 0x64F561, 0x64F561, 0x5F7149, 0x5F7149, 0x5A4D33, 0x5A4D33, 0x55811F, 0x55811F, + 0x51050F, 0x51050F, 0x4CC8FE, 0x4CC8FE, 0x48D0F0, 0x48D0F0, 0x4510E4, 0x4510E4, 0x4180D7, 0x4180D7, 0x3E24CC, 0x3E24CC, 0x3AF4C3, 0x3AF4C3, 0x37E8BA, 0x37E8BA +}; + +const u32 spu_frsqest_exponent_lut[256] = +{ + 0x7F800000, 0x5E800000, 0x5E800000, 0x5E000000, 0x5E000000, 0x5D800000, 0x5D800000, 0x5D000000, 0x5D000000, 0x5C800000, 0x5C800000, 0x5C000000, 0x5C000000, 0x5B800000, 0x5B800000, 0x5B000000, + 0x5B000000, 0x5A800000, 0x5A800000, 0x5A000000, 0x5A000000, 0x59800000, 0x59800000, 0x59000000, 0x59000000, 0x58800000, 0x58800000, 0x58000000, 0x58000000, 0x57800000, 0x57800000, 0x57000000, + 0x57000000, 0x56800000, 0x56800000, 0x56000000, 0x56000000, 0x55800000, 0x55800000, 0x55000000, 0x55000000, 0x54800000, 0x54800000, 0x54000000, 0x54000000, 0x53800000, 0x53800000, 0x53000000, + 0x53000000, 0x52800000, 0x52800000, 0x52000000, 0x52000000, 0x51800000, 0x51800000, 0x51000000, 0x51000000, 0x50800000, 0x50800000, 0x50000000, 0x50000000, 0x4F800000, 0x4F800000, 0x4F000000, + 0x4F000000, 0x4E800000, 0x4E800000, 0x4E000000, 0x4E000000, 0x4D800000, 0x4D800000, 0x4D000000, 0x4D000000, 0x4C800000, 0x4C800000, 0x4C000000, 0x4C000000, 0x4B800000, 0x4B800000, 0x4B000000, + 0x4B000000, 0x4A800000, 0x4A800000, 0x4A000000, 0x4A000000, 0x49800000, 0x49800000, 0x49000000, 0x49000000, 0x48800000, 0x48800000, 0x48000000, 0x48000000, 0x47800000, 0x47800000, 0x47000000, + 0x47000000, 0x46800000, 0x46800000, 0x46000000, 0x46000000, 0x45800000, 0x45800000, 0x45000000, 0x45000000, 0x44800000, 0x44800000, 0x44000000, 0x44000000, 0x43800000, 0x43800000, 0x43000000, + 0x43000000, 0x42800000, 0x42800000, 0x42000000, 0x42000000, 0x41800000, 0x41800000, 0x41000000, 0x41000000, 0x40800000, 0x40800000, 0x40000000, 0x40000000, 0x3F800000, 0x3F800000, 0x3F000000, + 0x3F000000, 0x3E800000, 0x3E800000, 0x3E000000, 0x3E000000, 0x3D800000, 0x3D800000, 0x3D000000, 0x3D000000, 0x3C800000, 0x3C800000, 0x3C000000, 0x3C000000, 0x3B800000, 0x3B800000, 0x3B000000, + 0x3B000000, 0x3A800000, 0x3A800000, 0x3A000000, 0x3A000000, 0x39800000, 0x39800000, 0x39000000, 0x39000000, 0x38800000, 0x38800000, 0x38000000, 0x38000000, 0x37800000, 0x37800000, 0x37000000, + 0x37000000, 0x36800000, 0x36800000, 0x36000000, 0x36000000, 0x35800000, 0x35800000, 0x35000000, 0x35000000, 0x34800000, 0x34800000, 0x34000000, 0x34000000, 0x33800000, 0x33800000, 0x33000000, + 0x33000000, 0x32800000, 0x32800000, 0x32000000, 0x32000000, 0x31800000, 0x31800000, 0x31000000, 0x31000000, 0x30800000, 0x30800000, 0x30000000, 0x30000000, 0x2F800000, 0x2F800000, 0x2F000000, + 0x2F000000, 0x2E800000, 0x2E800000, 0x2E000000, 0x2E000000, 0x2D800000, 0x2D800000, 0x2D000000, 0x2D000000, 0x2C800000, 0x2C800000, 0x2C000000, 0x2C000000, 0x2B800000, 0x2B800000, 0x2B000000, + 0x2B000000, 0x2A800000, 0x2A800000, 0x2A000000, 0x2A000000, 0x29800000, 0x29800000, 0x29000000, 0x29000000, 0x28800000, 0x28800000, 0x28000000, 0x28000000, 0x27800000, 0x27800000, 0x27000000, + 0x27000000, 0x26800000, 0x26800000, 0x26000000, 0x26000000, 0x25800000, 0x25800000, 0x25000000, 0x25000000, 0x24800000, 0x24800000, 0x24000000, 0x24000000, 0x23800000, 0x23800000, 0x23000000, + 0x23000000, 0x22800000, 0x22800000, 0x22000000, 0x22000000, 0x21800000, 0x21800000, 0x21000000, 0x21000000, 0x20800000, 0x20800000, 0x20000000, 0x20000000, 0x1F800000, 0x1F800000, 0x1F000000 +}; + using spu_rdata_t = decltype(spu_thread::rdata); template <> diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index f6ae828e07..0eb8cae5d9 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -24,6 +24,12 @@ namespace utils class shm; } +// LUTs for SPU +extern const u32 spu_frest_fraction_lut[32]; +extern const u32 spu_frest_exponent_lut[256]; +extern const u32 spu_frsqest_fraction_lut[64]; +extern const u32 spu_frsqest_exponent_lut[256]; + // JIT Block using spu_function_t = void(*)(spu_thread&, void*, u8*);