Accurate frest and frsqest

This commit is contained in:
RipleyTom 2024-01-20 00:05:31 +01:00 committed by Elad.Ash
parent 93f5154429
commit 606a60e901
5 changed files with 223 additions and 59 deletions

View File

@ -2756,16 +2756,77 @@ void spu_recompiler::FSMB(spu_opcode_t op)
void spu_recompiler::FREST(spu_opcode_t op)
{
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
c->rcpps(va, va);
c->movaps(SPU_OFF_128(gpr, op.rt), va);
const XmmLink& v_fraction = XmmAlloc();
const XmmLink& v_exponent = XmmAlloc();
const XmmLink& v_sign = XmmAlloc();
c->movdqa(v_fraction, va);
c->movdqa(v_exponent, va);
c->movdqa(v_sign, va);
c->psrld(v_fraction, 18);
c->psrld(v_exponent, 23);
c->andps(v_fraction, XmmConst(v128::from32p(0x1F)));
c->andps(v_exponent, XmmConst(v128::from32p(0xFF)));
c->andps(v_sign, XmmConst(v128::from32p(0x80000000)));
const u64 fraction_lut_addr = reinterpret_cast<u64>(spu_frest_fraction_lut);
const u64 exponent_lut_addr = reinterpret_cast<u64>(spu_frest_exponent_lut);
for (u32 index = 0; index < 4; index++)
{
c->pextrd(*qw0, v_fraction, index);
c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2));
c->pinsrd(v_fraction, *qw1, index);
c->pextrd(*qw0, v_exponent, index);
c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2));
c->pinsrd(v_exponent, *qw1, index);
}
// AVX2(not working?)
// c->mov(qw1->r64(),spu_frest_fraction_lut);
// c->vpgatherdd(v_fraction, asmjit::x86::dword_ptr(*qw1));
// c->mov(qw0->r64(),spu_frest_exponent_lut);
// c->vpgatherdd(v_exponent, asmjit::x86::dword_ptr(*qw0));
c->orps(v_fraction, v_exponent);
c->orps(v_sign, v_fraction);
c->movaps(SPU_OFF_128(gpr, op.rt), v_sign);
}
void spu_recompiler::FRSQEST(spu_opcode_t op)
{
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
c->andps(va, XmmConst(v128::from32p(0x7fffffff))); // abs
c->rsqrtps(va, va);
c->movaps(SPU_OFF_128(gpr, op.rt), va);
const XmmLink& v_fraction = XmmAlloc();
const XmmLink& v_exponent = XmmAlloc();
c->movdqa(v_fraction, va);
c->movdqa(v_exponent, va);
c->psrld(v_fraction, 18);
c->psrld(v_exponent, 23);
c->andps(v_fraction, XmmConst(v128::from32p(0x3F)));
c->andps(v_exponent, XmmConst(v128::from32p(0xFF)));
const u64 fraction_lut_addr = reinterpret_cast<u64>(spu_frsqest_fraction_lut);
const u64 exponent_lut_addr = reinterpret_cast<u64>(spu_frsqest_exponent_lut);
for (u32 index = 0; index < 4; index++)
{
c->pextrd(*qw0, v_fraction, index);
c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2));
c->pinsrd(v_fraction, *qw1, index);
c->pextrd(*qw0, v_exponent, index);
c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2));
c->pinsrd(v_exponent, *qw1, index);
}
c->orps(v_fraction, v_exponent);
c->movaps(SPU_OFF_128(gpr, op.rt), v_fraction);
}
void spu_recompiler::LQX(spu_opcode_t op)

View File

@ -689,15 +689,48 @@ bool FSMB(spu_thread& spu, spu_opcode_t op)
template <spu_exec_bit... Flags>
bool FREST(spu_thread& spu, spu_opcode_t op)
{
spu.gpr[op.rt] = _mm_rcp_ps(spu.gpr[op.ra]);
v128 fraction_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 18)) & v128(_mm_set1_epi32(0x1F));
v128 exponent_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 23)) & v128(_mm_set1_epi32(0xFF));
v128 sign = spu.gpr[op.ra] & _mm_set1_epi32(0x80000000);
// AVX2
// v128 fraction = _mm_i32gather_epi32(spu_frest_fraction_lut, fraction_index, 4);
// v128 exponent = _mm_i32gather_epi32(spu_frest_exponent_lut, exponent_index, 4);
v128 result;
for (u32 index = 0; index < 4; index++)
{
u32 r = spu_frest_fraction_lut[fraction_index._u32[index]];
r |= spu_frest_exponent_lut[exponent_index._u32[index]];
r |= sign._u32[index];
result._u32[index] = r;
}
spu.gpr[op.rt] = result;
return true;
}
template <spu_exec_bit... Flags>
bool FRSQEST(spu_thread& spu, spu_opcode_t op)
{
const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
spu.gpr[op.rt] = _mm_rsqrt_ps(_mm_and_ps(spu.gpr[op.ra], mask));
v128 fraction_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 18)) & v128(_mm_set1_epi32(0x3F));
v128 exponent_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 23)) & v128(_mm_set1_epi32(0xFF));
// AVX2
// v128 fraction = _mm_i32gather_epi32(spu_frsqest_fraction_lut, fraction_index, 4);
// v128 exponent = _mm_i32gather_epi32(spu_frsqest_exponent_lut, exponent_index, 4);
v128 result;
for (u32 index = 0; index < 4; index++)
{
u32 r = spu_frsqest_fraction_lut[fraction_index._u32[index]];
r |= spu_frsqest_exponent_lut[exponent_index._u32[index]];
result._u32[index] = r;
}
spu.gpr[op.rt] = result;
return true;
}

View File

@ -107,6 +107,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Global variable (function table)
llvm::GlobalVariable* m_function_table{};
// Global LUTs
llvm::GlobalVariable* m_spu_frest_fraction_lut{};
llvm::GlobalVariable* m_spu_frest_exponent_lut{};
llvm::GlobalVariable* m_spu_frsqest_fraction_lut{};
llvm::GlobalVariable* m_spu_frsqest_exponent_lut{};
// Helpers (interpreter)
llvm::GlobalVariable* m_scale_float_to{};
llvm::GlobalVariable* m_scale_to_float{};
@ -1091,6 +1097,15 @@ public:
}
}
void init_luts()
{
// LUTs for some instructions
m_spu_frest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType<u32>(), 32), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_fraction_lut));
m_spu_frest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType<u32>(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_exponent_lut));
m_spu_frsqest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType<u32>(), 64), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_fraction_lut));
m_spu_frsqest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType<u32>(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_exponent_lut));
}
virtual spu_function_t compile(spu_program&& _func) override
{
if (_func.data.empty() && m_interp_magn)
@ -1179,6 +1194,8 @@ public:
main_func->setCallingConv(CallingConv::GHC);
set_function(main_func);
init_luts();
// Start compilation
const auto label_test = BasicBlock::Create(m_context, "", m_function);
const auto label_diff = BasicBlock::Create(m_context, "", m_function);
@ -2158,6 +2175,8 @@ public:
const auto if_type = get_ftype<void, u8*, u8*, u32, u32, u8*, u32, u8*>();
m_function_table = new GlobalVariable(*m_module, ArrayType::get(if_type->getPointerTo(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr);
init_luts();
// Add return function
const auto ret_func = cast<Function>(_module->getOrInsertFunction("spu_ret", if_type).getCallee());
ret_func->setCallingConv(CallingConv::GHC);
@ -5297,36 +5316,29 @@ public:
void FREST(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
{
const auto a = get_vr<f32[4]>(op.ra);
const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
const auto mask_de = eval(noncast<u32[4]>(sext<s32[4]>(fcmp_ord(a == fsplat<f32[4]>(0.)))) >> 1);
set_vr(op.rt, (bitcast<s32[4]>(fsplat<f32[4]>(1.0) / a) & ~mask_ov) | noncast<s32[4]>(mask_de));
return;
}
const auto a = bitcast<u32[4]>(value<f32[4]>(ci->getOperand(0)));
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
const auto a_fraction = (a >> splat<u32[4]>(18)) & splat<u32[4]>(0x1F);
const auto a_exponent = (a >> splat<u32[4]>(23)) & splat<u32[4]>(0xFF);
const auto a_sign = (a & splat<u32[4]>(0x80000000));
value_t<u32[4]> final_result = eval(splat<u32[4]>(0));
for (u32 i = 0; i < 4; i++)
{
const auto a = value<f32[4]>(ci->getOperand(0));
// Gives accuracy penalty, frest result is within one newton-raphson iteration for accuracy
const auto approx_result = fsplat<f32[4]>(0.999875069f) / a;
// Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed
return bitcast<f32[4]>(bitcast<u32[4]>(approx_result) & splat<u32[4]>(0xFFFFF800));
});
}
else
{
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
// Fast but this makes the result vary per cpu
return fre(a);
});
}
const auto eval_fraction = eval(extract(a_fraction, i));
const auto eval_exponent = eval(extract(a_exponent, i));
const auto eval_sign = eval(extract(a_sign, i));
value_t<u32> r_fraction = load_const<u32>(m_spu_frest_fraction_lut, eval_fraction);
value_t<u32> r_exponent = load_const<u32>(m_spu_frest_exponent_lut, eval_exponent);
final_result = eval(insert(final_result, i, eval(r_fraction | eval_sign | r_exponent)));
}
return bitcast<f32[4]>(final_result);
});
set_vr(op.rt, frest(get_vr<f32[4]>(op.ra)));
}
@ -5339,33 +5351,27 @@ public:
void FRSQEST(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
{
set_vr(op.rt, fsplat<f64[4]>(1.0) / fsqrt(fabs(get_vr<f64[4]>(op.ra))));
return;
}
const auto a = bitcast<u32[4]>(value<f32[4]>(ci->getOperand(0)));
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
const auto a_fraction = (a >> splat<u32[4]>(18)) & splat<u32[4]>(0x3F);
const auto a_exponent = (a >> splat<u32[4]>(23)) & splat<u32[4]>(0xFF);
value_t<u32[4]> final_result = eval(splat<u32[4]>(0));
for (u32 i = 0; i < 4; i++)
{
const auto a = value<f32[4]>(ci->getOperand(0));
// Gives accuracy penalty, frsqest result is within one newton-raphson iteration for accuracy
const auto approx_result = fsplat<f32[4]>(0.999763668f) / fsqrt(fabs(a));
// Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed
return bitcast<f32[4]>(bitcast<u32[4]>(approx_result) & splat<u32[4]>(0xFFFFF800));
});
}
else
{
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
// Fast but this makes the result vary per cpu
return frsqe(fabs(a));
});
}
const auto eval_fraction = eval(extract(a_fraction, i));
const auto eval_exponent = eval(extract(a_exponent, i));
value_t<u32> r_fraction = load_const<u32>(m_spu_frsqest_fraction_lut, eval_fraction);
value_t<u32> r_exponent = load_const<u32>(m_spu_frsqest_exponent_lut, eval_exponent);
final_result = eval(insert(final_result, i, eval(r_fraction | r_exponent)));
}
return bitcast<f32[4]>(final_result);
});
set_vr(op.rt, frsqest(get_vr<f32[4]>(op.ra)));
}

View File

@ -46,6 +46,64 @@
#endif
#endif
// LUTs for SPU instructions
const u32 spu_frest_fraction_lut[32] =
{
0x7FFBE0, 0x7F87A6, 0x70EF72, 0x708B40, 0x638B12, 0x633AEA, 0x5792C4, 0x574AA0,
0x4CCA7E, 0x4C9262, 0x430A44, 0x42D62A, 0x3A2E12, 0x39FDFA, 0x3215E4, 0x31F1D2,
0x2AA9BE, 0x2A85AC, 0x23D59A, 0x23BD8E, 0x1D8576, 0x1D8576, 0x17AD5A, 0x17AD5A,
0x124543, 0x124543, 0x0D392D, 0x0D392D, 0x08851A, 0x08851A, 0x041D07, 0x041D07
};
const u32 spu_frest_exponent_lut[256] =
{
0x7F800000, 0x7E000000, 0x7D800000, 0x7D000000, 0x7C800000, 0x7C000000, 0x7B800000, 0x7B000000, 0x7A800000, 0x7A000000, 0x79800000, 0x79000000, 0x78800000, 0x78000000, 0x77800000, 0x77000000,
0x76800000, 0x76000000, 0x75800000, 0x75000000, 0x74800000, 0x74000000, 0x73800000, 0x73000000, 0x72800000, 0x72000000, 0x71800000, 0x71000000, 0x70800000, 0x70000000, 0x6F800000, 0x6F000000,
0x6E800000, 0x6E000000, 0x6D800000, 0x6D000000, 0x6C800000, 0x6C000000, 0x6B800000, 0x6B000000, 0x6A800000, 0x6A000000, 0x69800000, 0x69000000, 0x68800000, 0x68000000, 0x67800000, 0x67000000,
0x66800000, 0x66000000, 0x65800000, 0x65000000, 0x64800000, 0x64000000, 0x63800000, 0x63000000, 0x62800000, 0x62000000, 0x61800000, 0x61000000, 0x60800000, 0x60000000, 0x5F800000, 0x5F000000,
0x5E800000, 0x5E000000, 0x5D800000, 0x5D000000, 0x5C800000, 0x5C000000, 0x5B800000, 0x5B000000, 0x5A800000, 0x5A000000, 0x59800000, 0x59000000, 0x58800000, 0x58000000, 0x57800000, 0x57000000,
0x56800000, 0x56000000, 0x55800000, 0x55000000, 0x54800000, 0x54000000, 0x53800000, 0x53000000, 0x52800000, 0x52000000, 0x51800000, 0x51000000, 0x50800000, 0x50000000, 0x4F800000, 0x4F000000,
0x4E800000, 0x4E000000, 0x4D800000, 0x4D000000, 0x4C800000, 0x4C000000, 0x4B800000, 0x4B000000, 0x4A800000, 0x4A000000, 0x49800000, 0x49000000, 0x48800000, 0x48000000, 0x47800000, 0x47000000,
0x46800000, 0x46000000, 0x45800000, 0x45000000, 0x44800000, 0x44000000, 0x43800000, 0x43000000, 0x42800000, 0x42000000, 0x41800000, 0x41000000, 0x40800000, 0x40000000, 0x3F800000, 0x3F000000,
0x3E800000, 0x3E000000, 0x3D800000, 0x3D000000, 0x3C800000, 0x3C000000, 0x3B800000, 0x3B000000, 0x3A800000, 0x3A000000, 0x39800000, 0x39000000, 0x38800000, 0x38000000, 0x37800000, 0x37000000,
0x36800000, 0x36000000, 0x35800000, 0x35000000, 0x34800000, 0x34000000, 0x33800000, 0x33000000, 0x32800000, 0x32000000, 0x31800000, 0x31000000, 0x30800000, 0x30000000, 0x2F800000, 0x2F000000,
0x2E800000, 0x2E000000, 0x2D800000, 0x2D000000, 0x2C800000, 0x2C000000, 0x2B800000, 0x2B000000, 0x2A800000, 0x2A000000, 0x29800000, 0x29000000, 0x28800000, 0x28000000, 0x27800000, 0x27000000,
0x26800000, 0x26000000, 0x25800000, 0x25000000, 0x24800000, 0x24000000, 0x23800000, 0x23000000, 0x22800000, 0x22000000, 0x21800000, 0x21000000, 0x20800000, 0x20000000, 0x1F800000, 0x1F000000,
0x1E800000, 0x1E000000, 0x1D800000, 0x1D000000, 0x1C800000, 0x1C000000, 0x1B800000, 0x1B000000, 0x1A800000, 0x1A000000, 0x19800000, 0x19000000, 0x18800000, 0x18000000, 0x17800000, 0x17000000,
0x16800000, 0x16000000, 0x15800000, 0x15000000, 0x14800000, 0x14000000, 0x13800000, 0x13000000, 0x12800000, 0x12000000, 0x11800000, 0x11000000, 0x10800000, 0x10000000, 0x0F800000, 0x0F000000,
0x0E800000, 0x0E000000, 0x0D800000, 0x0D000000, 0x0C800000, 0x0C000000, 0x0B800000, 0x0B000000, 0x0A800000, 0x0A000000, 0x09800000, 0x09000000, 0x08800000, 0x08000000, 0x07800000, 0x07000000,
0x06800000, 0x06000000, 0x05800000, 0x05000000, 0x04800000, 0x04000000, 0x03800000, 0x03000000, 0x02800000, 0x02000000, 0x01800000, 0x01000000, 0x00800000, 0x00000000, 0x00000000, 0x00000000
};
const u32 spu_frsqest_fraction_lut[64] =
{
0x350160, 0x34E954, 0x2F993D, 0x2F993D, 0x2AA523, 0x2AA523, 0x26190D, 0x26190D, 0x21E4F9, 0x21E4F9, 0x1E00E9, 0x1E00E9, 0x1A5CD9, 0x1A5CD9, 0x16F8CB, 0x16F8CB,
0x13CCC0, 0x13CCC0, 0x10CCB3, 0x10CCB3, 0x0E00AA, 0x0E00AA, 0x0B58A1, 0x0B58A1, 0x08D498, 0x08D498, 0x067491, 0x067491, 0x043089, 0x043089, 0x020C83, 0x020C83,
0x7FFDF4, 0x7FD1DE, 0x7859C8, 0x783DBA, 0x71559C, 0x71559C, 0x6AE57C, 0x6AE57C, 0x64F561, 0x64F561, 0x5F7149, 0x5F7149, 0x5A4D33, 0x5A4D33, 0x55811F, 0x55811F,
0x51050F, 0x51050F, 0x4CC8FE, 0x4CC8FE, 0x48D0F0, 0x48D0F0, 0x4510E4, 0x4510E4, 0x4180D7, 0x4180D7, 0x3E24CC, 0x3E24CC, 0x3AF4C3, 0x3AF4C3, 0x37E8BA, 0x37E8BA
};
const u32 spu_frsqest_exponent_lut[256] =
{
0x7F800000, 0x5E800000, 0x5E800000, 0x5E000000, 0x5E000000, 0x5D800000, 0x5D800000, 0x5D000000, 0x5D000000, 0x5C800000, 0x5C800000, 0x5C000000, 0x5C000000, 0x5B800000, 0x5B800000, 0x5B000000,
0x5B000000, 0x5A800000, 0x5A800000, 0x5A000000, 0x5A000000, 0x59800000, 0x59800000, 0x59000000, 0x59000000, 0x58800000, 0x58800000, 0x58000000, 0x58000000, 0x57800000, 0x57800000, 0x57000000,
0x57000000, 0x56800000, 0x56800000, 0x56000000, 0x56000000, 0x55800000, 0x55800000, 0x55000000, 0x55000000, 0x54800000, 0x54800000, 0x54000000, 0x54000000, 0x53800000, 0x53800000, 0x53000000,
0x53000000, 0x52800000, 0x52800000, 0x52000000, 0x52000000, 0x51800000, 0x51800000, 0x51000000, 0x51000000, 0x50800000, 0x50800000, 0x50000000, 0x50000000, 0x4F800000, 0x4F800000, 0x4F000000,
0x4F000000, 0x4E800000, 0x4E800000, 0x4E000000, 0x4E000000, 0x4D800000, 0x4D800000, 0x4D000000, 0x4D000000, 0x4C800000, 0x4C800000, 0x4C000000, 0x4C000000, 0x4B800000, 0x4B800000, 0x4B000000,
0x4B000000, 0x4A800000, 0x4A800000, 0x4A000000, 0x4A000000, 0x49800000, 0x49800000, 0x49000000, 0x49000000, 0x48800000, 0x48800000, 0x48000000, 0x48000000, 0x47800000, 0x47800000, 0x47000000,
0x47000000, 0x46800000, 0x46800000, 0x46000000, 0x46000000, 0x45800000, 0x45800000, 0x45000000, 0x45000000, 0x44800000, 0x44800000, 0x44000000, 0x44000000, 0x43800000, 0x43800000, 0x43000000,
0x43000000, 0x42800000, 0x42800000, 0x42000000, 0x42000000, 0x41800000, 0x41800000, 0x41000000, 0x41000000, 0x40800000, 0x40800000, 0x40000000, 0x40000000, 0x3F800000, 0x3F800000, 0x3F000000,
0x3F000000, 0x3E800000, 0x3E800000, 0x3E000000, 0x3E000000, 0x3D800000, 0x3D800000, 0x3D000000, 0x3D000000, 0x3C800000, 0x3C800000, 0x3C000000, 0x3C000000, 0x3B800000, 0x3B800000, 0x3B000000,
0x3B000000, 0x3A800000, 0x3A800000, 0x3A000000, 0x3A000000, 0x39800000, 0x39800000, 0x39000000, 0x39000000, 0x38800000, 0x38800000, 0x38000000, 0x38000000, 0x37800000, 0x37800000, 0x37000000,
0x37000000, 0x36800000, 0x36800000, 0x36000000, 0x36000000, 0x35800000, 0x35800000, 0x35000000, 0x35000000, 0x34800000, 0x34800000, 0x34000000, 0x34000000, 0x33800000, 0x33800000, 0x33000000,
0x33000000, 0x32800000, 0x32800000, 0x32000000, 0x32000000, 0x31800000, 0x31800000, 0x31000000, 0x31000000, 0x30800000, 0x30800000, 0x30000000, 0x30000000, 0x2F800000, 0x2F800000, 0x2F000000,
0x2F000000, 0x2E800000, 0x2E800000, 0x2E000000, 0x2E000000, 0x2D800000, 0x2D800000, 0x2D000000, 0x2D000000, 0x2C800000, 0x2C800000, 0x2C000000, 0x2C000000, 0x2B800000, 0x2B800000, 0x2B000000,
0x2B000000, 0x2A800000, 0x2A800000, 0x2A000000, 0x2A000000, 0x29800000, 0x29800000, 0x29000000, 0x29000000, 0x28800000, 0x28800000, 0x28000000, 0x28000000, 0x27800000, 0x27800000, 0x27000000,
0x27000000, 0x26800000, 0x26800000, 0x26000000, 0x26000000, 0x25800000, 0x25800000, 0x25000000, 0x25000000, 0x24800000, 0x24800000, 0x24000000, 0x24000000, 0x23800000, 0x23800000, 0x23000000,
0x23000000, 0x22800000, 0x22800000, 0x22000000, 0x22000000, 0x21800000, 0x21800000, 0x21000000, 0x21000000, 0x20800000, 0x20800000, 0x20000000, 0x20000000, 0x1F800000, 0x1F800000, 0x1F000000
};
using spu_rdata_t = decltype(spu_thread::rdata);
template <>

View File

@ -24,6 +24,12 @@ namespace utils
class shm;
}
// LUTs for SPU
extern const u32 spu_frest_fraction_lut[32];
extern const u32 spu_frest_exponent_lut[256];
extern const u32 spu_frsqest_fraction_lut[64];
extern const u32 spu_frsqest_exponent_lut[256];
// JIT Block
using spu_function_t = void(*)(spu_thread&, void*, u8*);