mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-17 17:11:23 +00:00
PPU Precise/LLVM: Support NJ modes (#8617)
This commit is contained in:
parent
3354c800d7
commit
917069e31a
@ -359,6 +359,8 @@ public:
|
||||
}
|
||||
const g_ppu_scale_table;
|
||||
|
||||
constexpr u32 ppu_inf_u32 = 0x7F800000u;
|
||||
static const f32 ppu_inf_f32 = std::bit_cast<f32>(ppu_inf_u32);
|
||||
constexpr u32 ppu_nan_u32 = 0x7FC00000u;
|
||||
static const f32 ppu_nan_f32 = std::bit_cast<f32>(ppu_nan_u32);
|
||||
static const v128 ppu_vec_nans = v128::from32p(ppu_nan_u32);
|
||||
@ -403,6 +405,14 @@ v128 vec_handle_nan(__m128 result, Args... args)
|
||||
return vec_handle_nan(v128::fromF(result), v128::fromF(args)...);
|
||||
}
|
||||
|
||||
// Flush denormals to zero if NJ is 1
|
||||
inline v128 vec_handle_denormal(ppu_thread& ppu, v128 a)
|
||||
{
|
||||
const auto mask = v128::from32p(ppu.jm_mask);
|
||||
const auto nz = v128::fromV(_mm_srli_epi32(v128::eq32(mask & a, v128{}).vi, 1));
|
||||
return v128::andnot(nz, a);
|
||||
}
|
||||
|
||||
bool ppu_interpreter::MFVSCR(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.vr[op.vd] = v128::from32(0, 0, 0, u32{ppu.sat} | (u32{ppu.nj} << 16));
|
||||
@ -414,6 +424,7 @@ bool ppu_interpreter::MTVSCR(ppu_thread& ppu, ppu_opcode_t op)
|
||||
const u32 vscr = ppu.vr[op.vb]._u32[3];
|
||||
ppu.sat = (vscr & 1) != 0;
|
||||
ppu.nj = (vscr & 0x10000) != 0;
|
||||
ppu.jm_mask = ppu.nj ? ppu_inf_u32 : 0x7fff'ffff;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -427,10 +438,10 @@ bool ppu_interpreter::VADDCUW(ppu_thread& ppu, ppu_opcode_t op)
|
||||
|
||||
bool ppu_interpreter::VADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = ppu.vr[op.va];
|
||||
const auto b = ppu.vr[op.vb];
|
||||
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]);
|
||||
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
|
||||
const auto result = v128::addfs(a, b);
|
||||
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -958,26 +969,26 @@ bool ppu_interpreter::VLOGEFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
|
||||
bool ppu_interpreter_fast::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = ppu.vr[op.va].vf;
|
||||
const auto b = ppu.vr[op.vb].vf;
|
||||
const auto c = ppu.vr[op.vc].vf;
|
||||
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]).vf;
|
||||
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf;
|
||||
const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]).vf;
|
||||
const auto result = _mm_add_ps(_mm_mul_ps(a, c), b);
|
||||
ppu.vr[op.vd] = vec_handle_nan(result);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ppu_interpreter_precise::VMADDFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = ppu.vr[op.va];
|
||||
const auto b = ppu.vr[op.vb];
|
||||
const auto c = ppu.vr[op.vc];
|
||||
ppu.vr[op.rd] = vec_handle_nan(v128::fma32f(a, c, b), a, b, c);
|
||||
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]);
|
||||
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
|
||||
const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]);
|
||||
ppu.vr[op.rd] = vec_handle_denormal(ppu, vec_handle_nan(v128::fma32f(a, c, b), a, b, c));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ppu_interpreter::VMAXFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.vr[op.vd] = vec_handle_nan(_mm_max_ps(ppu.vr[op.va].vf, ppu.vr[op.vb].vf));
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(_mm_max_ps(ppu.vr[op.va].vf, ppu.vr[op.vb].vf)));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1123,7 +1134,7 @@ bool ppu_interpreter::VMINFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
const auto a = ppu.vr[op.va].vf;
|
||||
const auto b = ppu.vr[op.vb].vf;
|
||||
const auto result = _mm_or_ps(_mm_min_ps(a, b), _mm_min_ps(b, a));
|
||||
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1463,18 +1474,18 @@ bool ppu_interpreter_fast::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
const auto a = _mm_sub_ps(_mm_mul_ps(ppu.vr[op.va].vf, ppu.vr[op.vc].vf), ppu.vr[op.vb].vf);
|
||||
const auto b = _mm_set1_ps(-0.0f);
|
||||
const auto result = _mm_xor_ps(a, b);
|
||||
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
|
||||
ppu.vr[op.vd] = vec_handle_nan(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ppu_interpreter_precise::VNMSUBFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto m = _mm_set1_ps(-0.0f);
|
||||
const auto a = ppu.vr[op.va];
|
||||
const auto c = ppu.vr[op.vc];
|
||||
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]);
|
||||
const auto c = vec_handle_denormal(ppu, ppu.vr[op.vc]);
|
||||
const auto b = v128::fromF(_mm_xor_ps(ppu.vr[op.vb].vf, m));
|
||||
const auto r = v128::fromF(_mm_xor_ps(v128::fma32f(a, c, b).vf, m));
|
||||
ppu.vr[op.rd] = vec_handle_nan(r, a, b, c);
|
||||
ppu.vr[op.rd] = vec_handle_denormal(ppu, vec_handle_nan(r, a, b, c));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1874,15 +1885,15 @@ bool ppu_interpreter_precise::VPKUWUS(ppu_thread& ppu, ppu_opcode_t op)
|
||||
bool ppu_interpreter::VREFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = _mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f);
|
||||
const auto b = ppu.vr[op.vb].vf;
|
||||
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf;
|
||||
const auto result = _mm_div_ps(a, b);
|
||||
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ppu_interpreter::VRFIM(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto b = ppu.vr[op.vb];
|
||||
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
|
||||
v128 d;
|
||||
|
||||
for (uint w = 0; w < 4; w++)
|
||||
@ -1890,7 +1901,7 @@ bool ppu_interpreter::VRFIM(ppu_thread& ppu, ppu_opcode_t op)
|
||||
d._f[w] = std::floor(b._f[w]);
|
||||
}
|
||||
|
||||
ppu.vr[op.vd] = vec_handle_nan(d, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1904,13 +1915,13 @@ bool ppu_interpreter::VRFIN(ppu_thread& ppu, ppu_opcode_t op)
|
||||
d._f[w] = std::nearbyint(b._f[w]);
|
||||
}
|
||||
|
||||
ppu.vr[op.vd] = vec_handle_nan(d, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ppu_interpreter::VRFIP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto b = ppu.vr[op.vb];
|
||||
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
|
||||
v128 d;
|
||||
|
||||
for (uint w = 0; w < 4; w++)
|
||||
@ -1918,7 +1929,7 @@ bool ppu_interpreter::VRFIP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
d._f[w] = std::ceil(b._f[w]);
|
||||
}
|
||||
|
||||
ppu.vr[op.vd] = vec_handle_nan(d, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1932,7 +1943,7 @@ bool ppu_interpreter::VRFIZ(ppu_thread& ppu, ppu_opcode_t op)
|
||||
d._f[w] = std::truncf(b._f[w]);
|
||||
}
|
||||
|
||||
ppu.vr[op.vd] = vec_handle_nan(d, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(d, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1978,9 +1989,9 @@ bool ppu_interpreter::VRLW(ppu_thread& ppu, ppu_opcode_t op)
|
||||
bool ppu_interpreter::VRSQRTEFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = _mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f);
|
||||
const auto b = ppu.vr[op.vb].vf;
|
||||
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]).vf;
|
||||
const auto result = _mm_div_ps(a, _mm_sqrt_ps(b));
|
||||
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2277,10 +2288,10 @@ bool ppu_interpreter::VSUBCUW(ppu_thread& ppu, ppu_opcode_t op)
|
||||
|
||||
bool ppu_interpreter::VSUBFP(ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const auto a = ppu.vr[op.va];
|
||||
const auto b = ppu.vr[op.vb];
|
||||
const auto a = vec_handle_denormal(ppu, ppu.vr[op.va]);
|
||||
const auto b = vec_handle_denormal(ppu, ppu.vr[op.vb]);
|
||||
const auto result = v128::subfs(a, b);
|
||||
ppu.vr[op.vd] = vec_handle_nan(result, a, b);
|
||||
ppu.vr[op.vd] = vec_handle_denormal(ppu, vec_handle_nan(result, a, b));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1613,6 +1613,7 @@ extern void ppu_initialize(const ppu_module& info)
|
||||
non_win32,
|
||||
accurate_fma,
|
||||
accurate_ppu_vector_nan,
|
||||
java_mode_handling,
|
||||
|
||||
__bitset_enum_max
|
||||
};
|
||||
@ -1630,6 +1631,10 @@ extern void ppu_initialize(const ppu_module& info)
|
||||
{
|
||||
settings += ppu_settings::accurate_ppu_vector_nan;
|
||||
}
|
||||
if (g_cfg.core.llvm_ppu_jm_handling)
|
||||
{
|
||||
settings += ppu_settings::java_mode_handling;
|
||||
}
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
fmt::append(obj_name, "v3-tane-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
|
@ -186,7 +186,10 @@ public:
|
||||
exception, the corresponding element in the target vr is cleared to '0'. In both cases, the '0'
|
||||
has the same sign as the denormalized or underflowing value.
|
||||
*/
|
||||
bool nj = false;
|
||||
bool nj = true;
|
||||
|
||||
// Optimization: precomputed java-mode mask for handling denormals
|
||||
u32 jm_mask = 0x7f80'0000;
|
||||
|
||||
u32 raddr{0}; // Reservation addr
|
||||
u64 rtime{0};
|
||||
|
@ -46,6 +46,8 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
||||
thread_struct.insert(thread_struct.end(), 3, GetType<bool>()); // so, ov, ca
|
||||
thread_struct.insert(thread_struct.end(), 1, GetType<u8>()); // cnt
|
||||
thread_struct.insert(thread_struct.end(), 2, GetType<bool>()); // sat, nj
|
||||
thread_struct.emplace_back(ArrayType::get(GetType<char>(), 2)); // Padding
|
||||
thread_struct.insert(thread_struct.end(), 1, GetType<u32>()); // jm_mask
|
||||
|
||||
m_thread_type = StructType::create(m_context, thread_struct, "context_t");
|
||||
|
||||
@ -231,6 +233,25 @@ Value* PPUTranslator::VecHandleNan(Value* val)
|
||||
return val;
|
||||
}
|
||||
|
||||
Value* PPUTranslator::VecHandleDenormal(Value* val)
|
||||
{
|
||||
const auto type = val->getType();
|
||||
const auto value = type == GetType<u32[4]>() ? val : m_ir->CreateBitCast(val, GetType<u32[4]>());
|
||||
|
||||
const auto mask = SExt(m_ir->CreateICmpEQ(m_ir->CreateAnd(value, Broadcast(RegLoad(m_jm_mask), 4)), ConstantVector::getSplat(4, m_ir->getInt32(0))), GetType<s32[4]>());
|
||||
const auto nz = m_ir->CreateLShr(mask, 1);
|
||||
const auto result = m_ir->CreateAnd(m_ir->CreateNot(nz), value);
|
||||
|
||||
return type == GetType<u32[4]>() ? result : m_ir->CreateBitCast(result, type);
|
||||
}
|
||||
|
||||
Value* PPUTranslator::VecHandleResult(Value* val)
|
||||
{
|
||||
val = g_cfg.core.llvm_ppu_accurate_vector_nan ? VecHandleNan(val) : val;
|
||||
val = g_cfg.core.llvm_ppu_jm_handling ? VecHandleDenormal(val) : val;
|
||||
return val;
|
||||
}
|
||||
|
||||
Value* PPUTranslator::GetAddr(u64 _add)
|
||||
{
|
||||
if (m_reloc)
|
||||
@ -609,7 +630,9 @@ void PPUTranslator::MFVSCR(ppu_opcode_t op)
|
||||
void PPUTranslator::MTVSCR(ppu_opcode_t op)
|
||||
{
|
||||
const auto vscr = m_ir->CreateExtractElement(GetVr(op.vb, VrType::vi32), m_ir->getInt32(m_is_be ? 3 : 0));
|
||||
RegStore(Trunc(m_ir->CreateLShr(vscr, 16), GetType<bool>()), m_nj);
|
||||
const auto nj = Trunc(m_ir->CreateLShr(vscr, 16), GetType<bool>());
|
||||
RegStore(nj, m_nj);
|
||||
if (g_cfg.core.llvm_ppu_jm_handling) RegStore(m_ir->CreateSelect(nj, m_ir->getInt32(0x7f80'0000), m_ir->getInt32(0x7fff'ffff)), m_jm_mask);
|
||||
RegStore(Trunc(vscr, GetType<bool>()), m_sat);
|
||||
}
|
||||
|
||||
@ -625,7 +648,7 @@ void PPUTranslator::VADDFP(ppu_opcode_t op)
|
||||
const auto a = get_vr<f32[4]>(op.va);
|
||||
const auto b = get_vr<f32[4]>(op.vb);
|
||||
|
||||
set_vr(op.vd, vec_handle_nan(a + b));
|
||||
set_vr(op.vd, vec_handle_result(a + b));
|
||||
}
|
||||
|
||||
void PPUTranslator::VADDSBS(ppu_opcode_t op)
|
||||
@ -930,7 +953,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
|
||||
|
||||
if (data == v128{})
|
||||
{
|
||||
set_vr(op.vd, vec_handle_nan(a * c));
|
||||
set_vr(op.vd, vec_handle_result(a * c));
|
||||
ppu_log.notice("LLVM: VMADDFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
|
||||
return;
|
||||
}
|
||||
@ -938,7 +961,7 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
|
||||
|
||||
if (m_use_fma)
|
||||
{
|
||||
SetVr(op.vd, VecHandleNan(m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), { a.value, c.value, b.value })));
|
||||
SetVr(op.vd, VecHandleResult(m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), { a.value, c.value, b.value })));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -948,13 +971,13 @@ void PPUTranslator::VMADDFP(ppu_opcode_t op)
|
||||
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
|
||||
|
||||
const auto xr = m_ir->CreateCall(get_intrinsic<f64[4]>(llvm::Intrinsic::fmuladd), {xa, xc, xb});
|
||||
SetVr(op.vd, VecHandleNan(m_ir->CreateFPTrunc(xr, get_type<f32[4]>())));
|
||||
SetVr(op.vd, VecHandleResult(m_ir->CreateFPTrunc(xr, get_type<f32[4]>())));
|
||||
}
|
||||
|
||||
void PPUTranslator::VMAXFP(ppu_opcode_t op)
|
||||
{
|
||||
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
|
||||
SetVr(op.vd, VecHandleNan(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])));
|
||||
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOGT(ab[0], ab[1]), ab[0], ab[1])));
|
||||
}
|
||||
|
||||
void PPUTranslator::VMAXSB(ppu_opcode_t op)
|
||||
@ -1026,7 +1049,7 @@ void PPUTranslator::VMHRADDSHS(ppu_opcode_t op)
|
||||
void PPUTranslator::VMINFP(ppu_opcode_t op)
|
||||
{
|
||||
const auto ab = GetVrs(VrType::vf, op.va, op.vb);
|
||||
SetVr(op.vd, VecHandleNan(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])));
|
||||
SetVr(op.vd, VecHandleResult(m_ir->CreateSelect(m_ir->CreateFCmpOLT(ab[0], ab[1]), ab[0], ab[1])));
|
||||
}
|
||||
|
||||
void PPUTranslator::VMINSB(ppu_opcode_t op)
|
||||
@ -1236,7 +1259,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op)
|
||||
|
||||
if (data == v128{})
|
||||
{
|
||||
set_vr(op.vd, vec_handle_nan(-a * c));
|
||||
set_vr(op.vd, vec_handle_result(-a * c));
|
||||
ppu_log.notice("LLVM: VNMSUBFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
|
||||
return;
|
||||
}
|
||||
@ -1245,7 +1268,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op)
|
||||
// Differs from the emulated path with regards to negative zero
|
||||
if (m_use_fma)
|
||||
{
|
||||
SetVr(op.vd, VecHandleNan(m_ir->CreateFNeg(m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), { a.value, c.value, m_ir->CreateFNeg(b.value) }))));
|
||||
SetVr(op.vd, VecHandleResult(m_ir->CreateFNeg(m_ir->CreateCall(get_intrinsic<f32[4]>(llvm::Intrinsic::fma), { a.value, c.value, m_ir->CreateFNeg(b.value) }))));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1255,7 +1278,7 @@ void PPUTranslator::VNMSUBFP(ppu_opcode_t op)
|
||||
const auto xc = m_ir->CreateFPExt(c.value, get_type<f64[4]>());
|
||||
|
||||
const auto xr = m_ir->CreateFNeg(m_ir->CreateFSub(m_ir->CreateFMul(xa, xc), xb));
|
||||
SetVr(op.vd, VecHandleNan(m_ir->CreateFPTrunc(xr, get_type<f32[4]>())));
|
||||
SetVr(op.vd, VecHandleResult(m_ir->CreateFPTrunc(xr, get_type<f32[4]>())));
|
||||
}
|
||||
|
||||
void PPUTranslator::VNOR(ppu_opcode_t op)
|
||||
@ -1361,28 +1384,28 @@ void PPUTranslator::VPKUWUS(ppu_opcode_t op)
|
||||
|
||||
void PPUTranslator::VREFP(ppu_opcode_t op)
|
||||
{
|
||||
const auto result = VecHandleNan(m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType<f32>(), 1.0)), GetVr(op.vb, VrType::vf)));
|
||||
const auto result = VecHandleResult(m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType<f32>(), 1.0)), GetVr(op.vb, VrType::vf)));
|
||||
SetVr(op.vd, result);
|
||||
}
|
||||
|
||||
void PPUTranslator::VRFIM(ppu_opcode_t op)
|
||||
{
|
||||
SetVr(op.vd, VecHandleNan(Call(GetType<f32[4]>(), "llvm.floor.v4f32", GetVr(op.vb, VrType::vf))));
|
||||
SetVr(op.vd, VecHandleResult(Call(GetType<f32[4]>(), "llvm.floor.v4f32", GetVr(op.vb, VrType::vf))));
|
||||
}
|
||||
|
||||
void PPUTranslator::VRFIN(ppu_opcode_t op)
|
||||
{
|
||||
SetVr(op.vd, VecHandleNan(Call(GetType<f32[4]>(), "llvm.nearbyint.v4f32", GetVr(op.vb, VrType::vf))));
|
||||
SetVr(op.vd, VecHandleResult(Call(GetType<f32[4]>(), "llvm.nearbyint.v4f32", GetVr(op.vb, VrType::vf))));
|
||||
}
|
||||
|
||||
void PPUTranslator::VRFIP(ppu_opcode_t op)
|
||||
{
|
||||
SetVr(op.vd, VecHandleNan(Call(GetType<f32[4]>(), "llvm.ceil.v4f32", GetVr(op.vb, VrType::vf))));
|
||||
SetVr(op.vd, VecHandleResult(Call(GetType<f32[4]>(), "llvm.ceil.v4f32", GetVr(op.vb, VrType::vf))));
|
||||
}
|
||||
|
||||
void PPUTranslator::VRFIZ(ppu_opcode_t op)
|
||||
{
|
||||
SetVr(op.vd, VecHandleNan(Call(GetType<f32[4]>(), "llvm.trunc.v4f32", GetVr(op.vb, VrType::vf))));
|
||||
SetVr(op.vd, VecHandleResult(Call(GetType<f32[4]>(), "llvm.trunc.v4f32", GetVr(op.vb, VrType::vf))));
|
||||
}
|
||||
|
||||
void PPUTranslator::VRLB(ppu_opcode_t op)
|
||||
@ -1407,7 +1430,7 @@ void PPUTranslator::VRSQRTEFP(ppu_opcode_t op)
|
||||
{
|
||||
const auto result = m_ir->CreateFDiv(ConstantVector::getSplat(4, ConstantFP::get(GetType<f32>(), 1.0)), Call(GetType<f32[4]>(), "llvm.sqrt.v4f32", GetVr(op.vb, VrType::vf)));
|
||||
|
||||
SetVr(op.vd, VecHandleNan(result));
|
||||
SetVr(op.vd, VecHandleResult(result));
|
||||
}
|
||||
|
||||
void PPUTranslator::VSEL(ppu_opcode_t op)
|
||||
@ -1565,7 +1588,7 @@ void PPUTranslator::VSUBFP(ppu_opcode_t op)
|
||||
{
|
||||
const auto a = get_vr<f32[4]>(op.va);
|
||||
const auto b = get_vr<f32[4]>(op.vb);
|
||||
SetVr(op.vd, VecHandleNan(eval(a - b).eval(m_ir)));
|
||||
SetVr(op.vd, VecHandleResult(eval(a - b).eval(m_ir)));
|
||||
}
|
||||
|
||||
void PPUTranslator::VSUBSBS(ppu_opcode_t op)
|
||||
|
@ -52,9 +52,9 @@ class PPUTranslator final : public cpu_translator
|
||||
|
||||
llvm::Value* m_mtocr_table{};
|
||||
|
||||
llvm::Value* m_globals[173];
|
||||
llvm::Value* m_globals[175];
|
||||
llvm::Value** const m_g_cr = m_globals + 99;
|
||||
llvm::Value* m_locals[173];
|
||||
llvm::Value* m_locals[175];
|
||||
llvm::Value** const m_gpr = m_locals + 3;
|
||||
llvm::Value** const m_fpr = m_locals + 35;
|
||||
llvm::Value** const m_vr = m_locals + 67;
|
||||
@ -77,6 +77,7 @@ class PPUTranslator final : public cpu_translator
|
||||
DEF_VALUE(m_cnt, m_g_cnt, 170) // XER.CNT
|
||||
DEF_VALUE(m_sat, m_g_sat, 171) // VSCR.SAT bit, sticky saturation flag
|
||||
DEF_VALUE(m_nj, m_g_nj, 172) // VSCR.NJ bit, non-Java mode
|
||||
DEF_VALUE(m_jm_mask, m_g_jm_mask, 174) // Java-Mode helper mask
|
||||
|
||||
#undef DEF_VALUE
|
||||
public:
|
||||
@ -102,15 +103,14 @@ public:
|
||||
}
|
||||
|
||||
llvm::Value* VecHandleNan(llvm::Value* val);
|
||||
llvm::Value* VecHandleDenormal(llvm::Value* val);
|
||||
llvm::Value* VecHandleResult(llvm::Value* val);
|
||||
|
||||
template <typename T>
|
||||
auto vec_handle_nan(T&& expr)
|
||||
auto vec_handle_result(T&& expr)
|
||||
{
|
||||
value_t<typename T::type> result;
|
||||
if (g_cfg.core.llvm_ppu_accurate_vector_nan)
|
||||
result.value = VecHandleNan(expr.eval(m_ir));
|
||||
else
|
||||
result.value = expr.eval(m_ir);
|
||||
result.value = VecHandleResult(expr.eval(m_ir));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,7 @@ struct cfg_root : cfg::node
|
||||
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
|
||||
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
|
||||
cfg::_bool llvm_accurate_dfma{ this, "LLVM Accurate DFMA", true }; // Enable accurate double-precision FMA for CPUs which do not support it natively
|
||||
cfg::_bool llvm_ppu_jm_handling{ this, "PPU LLVM Java Mode Handling", false }; // Respect current Java Mode for alti-vec ops by PPU LLVM
|
||||
cfg::_bool llvm_ppu_accurate_vector_nan{ this, "PPU LLVM Accurate Vector NaN values", false };
|
||||
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user