mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-12 22:14:58 +00:00
Accurate frest and frsqest
This commit is contained in:
parent
93f5154429
commit
606a60e901
@ -2756,16 +2756,77 @@ void spu_recompiler::FSMB(spu_opcode_t op)
|
||||
void spu_recompiler::FREST(spu_opcode_t op)
|
||||
{
|
||||
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
||||
c->rcpps(va, va);
|
||||
c->movaps(SPU_OFF_128(gpr, op.rt), va);
|
||||
const XmmLink& v_fraction = XmmAlloc();
|
||||
const XmmLink& v_exponent = XmmAlloc();
|
||||
const XmmLink& v_sign = XmmAlloc();
|
||||
c->movdqa(v_fraction, va);
|
||||
c->movdqa(v_exponent, va);
|
||||
c->movdqa(v_sign, va);
|
||||
|
||||
c->psrld(v_fraction, 18);
|
||||
c->psrld(v_exponent, 23);
|
||||
|
||||
c->andps(v_fraction, XmmConst(v128::from32p(0x1F)));
|
||||
c->andps(v_exponent, XmmConst(v128::from32p(0xFF)));
|
||||
c->andps(v_sign, XmmConst(v128::from32p(0x80000000)));
|
||||
|
||||
const u64 fraction_lut_addr = reinterpret_cast<u64>(spu_frest_fraction_lut);
|
||||
const u64 exponent_lut_addr = reinterpret_cast<u64>(spu_frest_exponent_lut);
|
||||
|
||||
for (u32 index = 0; index < 4; index++)
|
||||
{
|
||||
c->pextrd(*qw0, v_fraction, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2));
|
||||
c->pinsrd(v_fraction, *qw1, index);
|
||||
|
||||
c->pextrd(*qw0, v_exponent, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2));
|
||||
c->pinsrd(v_exponent, *qw1, index);
|
||||
}
|
||||
|
||||
// AVX2(not working?)
|
||||
// c->mov(qw1->r64(),spu_frest_fraction_lut);
|
||||
// c->vpgatherdd(v_fraction, asmjit::x86::dword_ptr(*qw1));
|
||||
// c->mov(qw0->r64(),spu_frest_exponent_lut);
|
||||
// c->vpgatherdd(v_exponent, asmjit::x86::dword_ptr(*qw0));
|
||||
|
||||
c->orps(v_fraction, v_exponent);
|
||||
c->orps(v_sign, v_fraction);
|
||||
|
||||
c->movaps(SPU_OFF_128(gpr, op.rt), v_sign);
|
||||
}
|
||||
|
||||
void spu_recompiler::FRSQEST(spu_opcode_t op)
|
||||
{
|
||||
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
||||
c->andps(va, XmmConst(v128::from32p(0x7fffffff))); // abs
|
||||
c->rsqrtps(va, va);
|
||||
c->movaps(SPU_OFF_128(gpr, op.rt), va);
|
||||
const XmmLink& v_fraction = XmmAlloc();
|
||||
const XmmLink& v_exponent = XmmAlloc();
|
||||
c->movdqa(v_fraction, va);
|
||||
c->movdqa(v_exponent, va);
|
||||
|
||||
c->psrld(v_fraction, 18);
|
||||
c->psrld(v_exponent, 23);
|
||||
|
||||
c->andps(v_fraction, XmmConst(v128::from32p(0x3F)));
|
||||
c->andps(v_exponent, XmmConst(v128::from32p(0xFF)));
|
||||
|
||||
const u64 fraction_lut_addr = reinterpret_cast<u64>(spu_frsqest_fraction_lut);
|
||||
const u64 exponent_lut_addr = reinterpret_cast<u64>(spu_frsqest_exponent_lut);
|
||||
|
||||
for (u32 index = 0; index < 4; index++)
|
||||
{
|
||||
c->pextrd(*qw0, v_fraction, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2));
|
||||
c->pinsrd(v_fraction, *qw1, index);
|
||||
|
||||
c->pextrd(*qw0, v_exponent, index);
|
||||
c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2));
|
||||
c->pinsrd(v_exponent, *qw1, index);
|
||||
}
|
||||
|
||||
c->orps(v_fraction, v_exponent);
|
||||
|
||||
c->movaps(SPU_OFF_128(gpr, op.rt), v_fraction);
|
||||
}
|
||||
|
||||
void spu_recompiler::LQX(spu_opcode_t op)
|
||||
|
@ -689,15 +689,48 @@ bool FSMB(spu_thread& spu, spu_opcode_t op)
|
||||
template <spu_exec_bit... Flags>
|
||||
bool FREST(spu_thread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt] = _mm_rcp_ps(spu.gpr[op.ra]);
|
||||
v128 fraction_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 18)) & v128(_mm_set1_epi32(0x1F));
|
||||
v128 exponent_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 23)) & v128(_mm_set1_epi32(0xFF));
|
||||
v128 sign = spu.gpr[op.ra] & _mm_set1_epi32(0x80000000);
|
||||
|
||||
// AVX2
|
||||
// v128 fraction = _mm_i32gather_epi32(spu_frest_fraction_lut, fraction_index, 4);
|
||||
// v128 exponent = _mm_i32gather_epi32(spu_frest_exponent_lut, exponent_index, 4);
|
||||
|
||||
v128 result;
|
||||
|
||||
for (u32 index = 0; index < 4; index++)
|
||||
{
|
||||
u32 r = spu_frest_fraction_lut[fraction_index._u32[index]];
|
||||
r |= spu_frest_exponent_lut[exponent_index._u32[index]];
|
||||
r |= sign._u32[index];
|
||||
result._u32[index] = r;
|
||||
}
|
||||
|
||||
spu.gpr[op.rt] = result;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <spu_exec_bit... Flags>
|
||||
bool FRSQEST(spu_thread& spu, spu_opcode_t op)
|
||||
{
|
||||
const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
||||
spu.gpr[op.rt] = _mm_rsqrt_ps(_mm_and_ps(spu.gpr[op.ra], mask));
|
||||
v128 fraction_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 18)) & v128(_mm_set1_epi32(0x3F));
|
||||
v128 exponent_index = v128(_mm_srli_epi32(spu.gpr[op.ra], 23)) & v128(_mm_set1_epi32(0xFF));
|
||||
|
||||
// AVX2
|
||||
// v128 fraction = _mm_i32gather_epi32(spu_frsqest_fraction_lut, fraction_index, 4);
|
||||
// v128 exponent = _mm_i32gather_epi32(spu_frsqest_exponent_lut, exponent_index, 4);
|
||||
|
||||
v128 result;
|
||||
|
||||
for (u32 index = 0; index < 4; index++)
|
||||
{
|
||||
u32 r = spu_frsqest_fraction_lut[fraction_index._u32[index]];
|
||||
r |= spu_frsqest_exponent_lut[exponent_index._u32[index]];
|
||||
result._u32[index] = r;
|
||||
}
|
||||
|
||||
spu.gpr[op.rt] = result;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -107,6 +107,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||
// Global variable (function table)
|
||||
llvm::GlobalVariable* m_function_table{};
|
||||
|
||||
// Global LUTs
|
||||
llvm::GlobalVariable* m_spu_frest_fraction_lut{};
|
||||
llvm::GlobalVariable* m_spu_frest_exponent_lut{};
|
||||
llvm::GlobalVariable* m_spu_frsqest_fraction_lut{};
|
||||
llvm::GlobalVariable* m_spu_frsqest_exponent_lut{};
|
||||
|
||||
// Helpers (interpreter)
|
||||
llvm::GlobalVariable* m_scale_float_to{};
|
||||
llvm::GlobalVariable* m_scale_to_float{};
|
||||
@ -1091,6 +1097,15 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void init_luts()
|
||||
{
|
||||
// LUTs for some instructions
|
||||
m_spu_frest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType<u32>(), 32), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_fraction_lut));
|
||||
m_spu_frest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType<u32>(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_exponent_lut));
|
||||
m_spu_frsqest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType<u32>(), 64), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_fraction_lut));
|
||||
m_spu_frsqest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType<u32>(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_exponent_lut));
|
||||
}
|
||||
|
||||
virtual spu_function_t compile(spu_program&& _func) override
|
||||
{
|
||||
if (_func.data.empty() && m_interp_magn)
|
||||
@ -1179,6 +1194,8 @@ public:
|
||||
main_func->setCallingConv(CallingConv::GHC);
|
||||
set_function(main_func);
|
||||
|
||||
init_luts();
|
||||
|
||||
// Start compilation
|
||||
const auto label_test = BasicBlock::Create(m_context, "", m_function);
|
||||
const auto label_diff = BasicBlock::Create(m_context, "", m_function);
|
||||
@ -2158,6 +2175,8 @@ public:
|
||||
const auto if_type = get_ftype<void, u8*, u8*, u32, u32, u8*, u32, u8*>();
|
||||
m_function_table = new GlobalVariable(*m_module, ArrayType::get(if_type->getPointerTo(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr);
|
||||
|
||||
init_luts();
|
||||
|
||||
// Add return function
|
||||
const auto ret_func = cast<Function>(_module->getOrInsertFunction("spu_ret", if_type).getCallee());
|
||||
ret_func->setCallingConv(CallingConv::GHC);
|
||||
@ -5297,36 +5316,29 @@ public:
|
||||
|
||||
void FREST(spu_opcode_t op)
|
||||
{
|
||||
// TODO
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
|
||||
{
|
||||
const auto a = get_vr<f32[4]>(op.ra);
|
||||
const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
|
||||
const auto mask_de = eval(noncast<u32[4]>(sext<s32[4]>(fcmp_ord(a == fsplat<f32[4]>(0.)))) >> 1);
|
||||
set_vr(op.rt, (bitcast<s32[4]>(fsplat<f32[4]>(1.0) / a) & ~mask_ov) | noncast<s32[4]>(mask_de));
|
||||
return;
|
||||
}
|
||||
const auto a = bitcast<u32[4]>(value<f32[4]>(ci->getOperand(0)));
|
||||
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
|
||||
const auto a_fraction = (a >> splat<u32[4]>(18)) & splat<u32[4]>(0x1F);
|
||||
const auto a_exponent = (a >> splat<u32[4]>(23)) & splat<u32[4]>(0xFF);
|
||||
const auto a_sign = (a & splat<u32[4]>(0x80000000));
|
||||
value_t<u32[4]> final_result = eval(splat<u32[4]>(0));
|
||||
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const auto a = value<f32[4]>(ci->getOperand(0));
|
||||
// Gives accuracy penalty, frest result is within one newton-raphson iteration for accuracy
|
||||
const auto approx_result = fsplat<f32[4]>(0.999875069f) / a;
|
||||
// Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed
|
||||
return bitcast<f32[4]>(bitcast<u32[4]>(approx_result) & splat<u32[4]>(0xFFFFF800));
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
|
||||
{
|
||||
const auto a = value<f32[4]>(ci->getOperand(0));
|
||||
// Fast but this makes the result vary per cpu
|
||||
return fre(a);
|
||||
});
|
||||
}
|
||||
const auto eval_fraction = eval(extract(a_fraction, i));
|
||||
const auto eval_exponent = eval(extract(a_exponent, i));
|
||||
const auto eval_sign = eval(extract(a_sign, i));
|
||||
|
||||
value_t<u32> r_fraction = load_const<u32>(m_spu_frest_fraction_lut, eval_fraction);
|
||||
value_t<u32> r_exponent = load_const<u32>(m_spu_frest_exponent_lut, eval_exponent);
|
||||
|
||||
final_result = eval(insert(final_result, i, eval(r_fraction | eval_sign | r_exponent)));
|
||||
}
|
||||
|
||||
return bitcast<f32[4]>(final_result);
|
||||
});
|
||||
|
||||
set_vr(op.rt, frest(get_vr<f32[4]>(op.ra)));
|
||||
}
|
||||
@ -5339,33 +5351,27 @@ public:
|
||||
|
||||
void FRSQEST(spu_opcode_t op)
|
||||
{
|
||||
// TODO
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
|
||||
{
|
||||
set_vr(op.rt, fsplat<f64[4]>(1.0) / fsqrt(fabs(get_vr<f64[4]>(op.ra))));
|
||||
return;
|
||||
}
|
||||
const auto a = bitcast<u32[4]>(value<f32[4]>(ci->getOperand(0)));
|
||||
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
|
||||
const auto a_fraction = (a >> splat<u32[4]>(18)) & splat<u32[4]>(0x3F);
|
||||
const auto a_exponent = (a >> splat<u32[4]>(23)) & splat<u32[4]>(0xFF);
|
||||
value_t<u32[4]> final_result = eval(splat<u32[4]>(0));
|
||||
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const auto a = value<f32[4]>(ci->getOperand(0));
|
||||
// Gives accuracy penalty, frsqest result is within one newton-raphson iteration for accuracy
|
||||
const auto approx_result = fsplat<f32[4]>(0.999763668f) / fsqrt(fabs(a));
|
||||
// Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed
|
||||
return bitcast<f32[4]>(bitcast<u32[4]>(approx_result) & splat<u32[4]>(0xFFFFF800));
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
|
||||
{
|
||||
const auto a = value<f32[4]>(ci->getOperand(0));
|
||||
// Fast but this makes the result vary per cpu
|
||||
return frsqe(fabs(a));
|
||||
});
|
||||
}
|
||||
const auto eval_fraction = eval(extract(a_fraction, i));
|
||||
const auto eval_exponent = eval(extract(a_exponent, i));
|
||||
|
||||
value_t<u32> r_fraction = load_const<u32>(m_spu_frsqest_fraction_lut, eval_fraction);
|
||||
value_t<u32> r_exponent = load_const<u32>(m_spu_frsqest_exponent_lut, eval_exponent);
|
||||
|
||||
final_result = eval(insert(final_result, i, eval(r_fraction | r_exponent)));
|
||||
}
|
||||
|
||||
return bitcast<f32[4]>(final_result);
|
||||
});
|
||||
|
||||
set_vr(op.rt, frsqest(get_vr<f32[4]>(op.ra)));
|
||||
}
|
||||
|
@ -46,6 +46,64 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// LUTs for SPU instructions
|
||||
|
||||
const u32 spu_frest_fraction_lut[32] =
|
||||
{
|
||||
0x7FFBE0, 0x7F87A6, 0x70EF72, 0x708B40, 0x638B12, 0x633AEA, 0x5792C4, 0x574AA0,
|
||||
0x4CCA7E, 0x4C9262, 0x430A44, 0x42D62A, 0x3A2E12, 0x39FDFA, 0x3215E4, 0x31F1D2,
|
||||
0x2AA9BE, 0x2A85AC, 0x23D59A, 0x23BD8E, 0x1D8576, 0x1D8576, 0x17AD5A, 0x17AD5A,
|
||||
0x124543, 0x124543, 0x0D392D, 0x0D392D, 0x08851A, 0x08851A, 0x041D07, 0x041D07
|
||||
};
|
||||
|
||||
const u32 spu_frest_exponent_lut[256] =
|
||||
{
|
||||
0x7F800000, 0x7E000000, 0x7D800000, 0x7D000000, 0x7C800000, 0x7C000000, 0x7B800000, 0x7B000000, 0x7A800000, 0x7A000000, 0x79800000, 0x79000000, 0x78800000, 0x78000000, 0x77800000, 0x77000000,
|
||||
0x76800000, 0x76000000, 0x75800000, 0x75000000, 0x74800000, 0x74000000, 0x73800000, 0x73000000, 0x72800000, 0x72000000, 0x71800000, 0x71000000, 0x70800000, 0x70000000, 0x6F800000, 0x6F000000,
|
||||
0x6E800000, 0x6E000000, 0x6D800000, 0x6D000000, 0x6C800000, 0x6C000000, 0x6B800000, 0x6B000000, 0x6A800000, 0x6A000000, 0x69800000, 0x69000000, 0x68800000, 0x68000000, 0x67800000, 0x67000000,
|
||||
0x66800000, 0x66000000, 0x65800000, 0x65000000, 0x64800000, 0x64000000, 0x63800000, 0x63000000, 0x62800000, 0x62000000, 0x61800000, 0x61000000, 0x60800000, 0x60000000, 0x5F800000, 0x5F000000,
|
||||
0x5E800000, 0x5E000000, 0x5D800000, 0x5D000000, 0x5C800000, 0x5C000000, 0x5B800000, 0x5B000000, 0x5A800000, 0x5A000000, 0x59800000, 0x59000000, 0x58800000, 0x58000000, 0x57800000, 0x57000000,
|
||||
0x56800000, 0x56000000, 0x55800000, 0x55000000, 0x54800000, 0x54000000, 0x53800000, 0x53000000, 0x52800000, 0x52000000, 0x51800000, 0x51000000, 0x50800000, 0x50000000, 0x4F800000, 0x4F000000,
|
||||
0x4E800000, 0x4E000000, 0x4D800000, 0x4D000000, 0x4C800000, 0x4C000000, 0x4B800000, 0x4B000000, 0x4A800000, 0x4A000000, 0x49800000, 0x49000000, 0x48800000, 0x48000000, 0x47800000, 0x47000000,
|
||||
0x46800000, 0x46000000, 0x45800000, 0x45000000, 0x44800000, 0x44000000, 0x43800000, 0x43000000, 0x42800000, 0x42000000, 0x41800000, 0x41000000, 0x40800000, 0x40000000, 0x3F800000, 0x3F000000,
|
||||
0x3E800000, 0x3E000000, 0x3D800000, 0x3D000000, 0x3C800000, 0x3C000000, 0x3B800000, 0x3B000000, 0x3A800000, 0x3A000000, 0x39800000, 0x39000000, 0x38800000, 0x38000000, 0x37800000, 0x37000000,
|
||||
0x36800000, 0x36000000, 0x35800000, 0x35000000, 0x34800000, 0x34000000, 0x33800000, 0x33000000, 0x32800000, 0x32000000, 0x31800000, 0x31000000, 0x30800000, 0x30000000, 0x2F800000, 0x2F000000,
|
||||
0x2E800000, 0x2E000000, 0x2D800000, 0x2D000000, 0x2C800000, 0x2C000000, 0x2B800000, 0x2B000000, 0x2A800000, 0x2A000000, 0x29800000, 0x29000000, 0x28800000, 0x28000000, 0x27800000, 0x27000000,
|
||||
0x26800000, 0x26000000, 0x25800000, 0x25000000, 0x24800000, 0x24000000, 0x23800000, 0x23000000, 0x22800000, 0x22000000, 0x21800000, 0x21000000, 0x20800000, 0x20000000, 0x1F800000, 0x1F000000,
|
||||
0x1E800000, 0x1E000000, 0x1D800000, 0x1D000000, 0x1C800000, 0x1C000000, 0x1B800000, 0x1B000000, 0x1A800000, 0x1A000000, 0x19800000, 0x19000000, 0x18800000, 0x18000000, 0x17800000, 0x17000000,
|
||||
0x16800000, 0x16000000, 0x15800000, 0x15000000, 0x14800000, 0x14000000, 0x13800000, 0x13000000, 0x12800000, 0x12000000, 0x11800000, 0x11000000, 0x10800000, 0x10000000, 0x0F800000, 0x0F000000,
|
||||
0x0E800000, 0x0E000000, 0x0D800000, 0x0D000000, 0x0C800000, 0x0C000000, 0x0B800000, 0x0B000000, 0x0A800000, 0x0A000000, 0x09800000, 0x09000000, 0x08800000, 0x08000000, 0x07800000, 0x07000000,
|
||||
0x06800000, 0x06000000, 0x05800000, 0x05000000, 0x04800000, 0x04000000, 0x03800000, 0x03000000, 0x02800000, 0x02000000, 0x01800000, 0x01000000, 0x00800000, 0x00000000, 0x00000000, 0x00000000
|
||||
};
|
||||
|
||||
const u32 spu_frsqest_fraction_lut[64] =
|
||||
{
|
||||
0x350160, 0x34E954, 0x2F993D, 0x2F993D, 0x2AA523, 0x2AA523, 0x26190D, 0x26190D, 0x21E4F9, 0x21E4F9, 0x1E00E9, 0x1E00E9, 0x1A5CD9, 0x1A5CD9, 0x16F8CB, 0x16F8CB,
|
||||
0x13CCC0, 0x13CCC0, 0x10CCB3, 0x10CCB3, 0x0E00AA, 0x0E00AA, 0x0B58A1, 0x0B58A1, 0x08D498, 0x08D498, 0x067491, 0x067491, 0x043089, 0x043089, 0x020C83, 0x020C83,
|
||||
0x7FFDF4, 0x7FD1DE, 0x7859C8, 0x783DBA, 0x71559C, 0x71559C, 0x6AE57C, 0x6AE57C, 0x64F561, 0x64F561, 0x5F7149, 0x5F7149, 0x5A4D33, 0x5A4D33, 0x55811F, 0x55811F,
|
||||
0x51050F, 0x51050F, 0x4CC8FE, 0x4CC8FE, 0x48D0F0, 0x48D0F0, 0x4510E4, 0x4510E4, 0x4180D7, 0x4180D7, 0x3E24CC, 0x3E24CC, 0x3AF4C3, 0x3AF4C3, 0x37E8BA, 0x37E8BA
|
||||
};
|
||||
|
||||
const u32 spu_frsqest_exponent_lut[256] =
|
||||
{
|
||||
0x7F800000, 0x5E800000, 0x5E800000, 0x5E000000, 0x5E000000, 0x5D800000, 0x5D800000, 0x5D000000, 0x5D000000, 0x5C800000, 0x5C800000, 0x5C000000, 0x5C000000, 0x5B800000, 0x5B800000, 0x5B000000,
|
||||
0x5B000000, 0x5A800000, 0x5A800000, 0x5A000000, 0x5A000000, 0x59800000, 0x59800000, 0x59000000, 0x59000000, 0x58800000, 0x58800000, 0x58000000, 0x58000000, 0x57800000, 0x57800000, 0x57000000,
|
||||
0x57000000, 0x56800000, 0x56800000, 0x56000000, 0x56000000, 0x55800000, 0x55800000, 0x55000000, 0x55000000, 0x54800000, 0x54800000, 0x54000000, 0x54000000, 0x53800000, 0x53800000, 0x53000000,
|
||||
0x53000000, 0x52800000, 0x52800000, 0x52000000, 0x52000000, 0x51800000, 0x51800000, 0x51000000, 0x51000000, 0x50800000, 0x50800000, 0x50000000, 0x50000000, 0x4F800000, 0x4F800000, 0x4F000000,
|
||||
0x4F000000, 0x4E800000, 0x4E800000, 0x4E000000, 0x4E000000, 0x4D800000, 0x4D800000, 0x4D000000, 0x4D000000, 0x4C800000, 0x4C800000, 0x4C000000, 0x4C000000, 0x4B800000, 0x4B800000, 0x4B000000,
|
||||
0x4B000000, 0x4A800000, 0x4A800000, 0x4A000000, 0x4A000000, 0x49800000, 0x49800000, 0x49000000, 0x49000000, 0x48800000, 0x48800000, 0x48000000, 0x48000000, 0x47800000, 0x47800000, 0x47000000,
|
||||
0x47000000, 0x46800000, 0x46800000, 0x46000000, 0x46000000, 0x45800000, 0x45800000, 0x45000000, 0x45000000, 0x44800000, 0x44800000, 0x44000000, 0x44000000, 0x43800000, 0x43800000, 0x43000000,
|
||||
0x43000000, 0x42800000, 0x42800000, 0x42000000, 0x42000000, 0x41800000, 0x41800000, 0x41000000, 0x41000000, 0x40800000, 0x40800000, 0x40000000, 0x40000000, 0x3F800000, 0x3F800000, 0x3F000000,
|
||||
0x3F000000, 0x3E800000, 0x3E800000, 0x3E000000, 0x3E000000, 0x3D800000, 0x3D800000, 0x3D000000, 0x3D000000, 0x3C800000, 0x3C800000, 0x3C000000, 0x3C000000, 0x3B800000, 0x3B800000, 0x3B000000,
|
||||
0x3B000000, 0x3A800000, 0x3A800000, 0x3A000000, 0x3A000000, 0x39800000, 0x39800000, 0x39000000, 0x39000000, 0x38800000, 0x38800000, 0x38000000, 0x38000000, 0x37800000, 0x37800000, 0x37000000,
|
||||
0x37000000, 0x36800000, 0x36800000, 0x36000000, 0x36000000, 0x35800000, 0x35800000, 0x35000000, 0x35000000, 0x34800000, 0x34800000, 0x34000000, 0x34000000, 0x33800000, 0x33800000, 0x33000000,
|
||||
0x33000000, 0x32800000, 0x32800000, 0x32000000, 0x32000000, 0x31800000, 0x31800000, 0x31000000, 0x31000000, 0x30800000, 0x30800000, 0x30000000, 0x30000000, 0x2F800000, 0x2F800000, 0x2F000000,
|
||||
0x2F000000, 0x2E800000, 0x2E800000, 0x2E000000, 0x2E000000, 0x2D800000, 0x2D800000, 0x2D000000, 0x2D000000, 0x2C800000, 0x2C800000, 0x2C000000, 0x2C000000, 0x2B800000, 0x2B800000, 0x2B000000,
|
||||
0x2B000000, 0x2A800000, 0x2A800000, 0x2A000000, 0x2A000000, 0x29800000, 0x29800000, 0x29000000, 0x29000000, 0x28800000, 0x28800000, 0x28000000, 0x28000000, 0x27800000, 0x27800000, 0x27000000,
|
||||
0x27000000, 0x26800000, 0x26800000, 0x26000000, 0x26000000, 0x25800000, 0x25800000, 0x25000000, 0x25000000, 0x24800000, 0x24800000, 0x24000000, 0x24000000, 0x23800000, 0x23800000, 0x23000000,
|
||||
0x23000000, 0x22800000, 0x22800000, 0x22000000, 0x22000000, 0x21800000, 0x21800000, 0x21000000, 0x21000000, 0x20800000, 0x20800000, 0x20000000, 0x20000000, 0x1F800000, 0x1F800000, 0x1F000000
|
||||
};
|
||||
|
||||
using spu_rdata_t = decltype(spu_thread::rdata);
|
||||
|
||||
template <>
|
||||
|
@ -24,6 +24,12 @@ namespace utils
|
||||
class shm;
|
||||
}
|
||||
|
||||
// LUTs for SPU
|
||||
extern const u32 spu_frest_fraction_lut[32];
|
||||
extern const u32 spu_frest_exponent_lut[256];
|
||||
extern const u32 spu_frsqest_fraction_lut[64];
|
||||
extern const u32 spu_frsqest_exponent_lut[256];
|
||||
|
||||
// JIT Block
|
||||
using spu_function_t = void(*)(spu_thread&, void*, u8*);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user