JitArm64: Merge ps_mulsX and ps_maddXX

They have a lot of shared code, most notably the code for rounding c.

No behavior change.
This commit is contained in:
JosJuice 2022-10-09 12:14:47 +02:00
parent 2153276ab9
commit 554a2fd332
3 changed files with 27 additions and 64 deletions

View File

@ -152,9 +152,8 @@ public:
void frsqrtex(UGeckoInstruction inst);
// Paired
void ps_maddXX(UGeckoInstruction inst);
void ps_mergeXX(UGeckoInstruction inst);
void ps_mulsX(UGeckoInstruction inst);
void ps_arith(UGeckoInstruction inst);
void ps_sel(UGeckoInstruction inst);
void ps_sumX(UGeckoInstruction inst);
void ps_res(UGeckoInstruction inst);

View File

@ -73,55 +73,7 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst)
"Register allocation turned singles into doubles in the middle of ps_mergeXX");
}
void JitArm64::ps_mulsX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(jo.fp_exceptions);
const u32 a = inst.FA;
const u32 c = inst.FC;
const u32 d = inst.FD;
const bool upper = inst.SUBOP5 == 13;
const bool singles = fpr.IsSingle(a) && fpr.IsSingle(c);
const bool round_c = !js.op->fprIsSingle[inst.FC];
const RegType type = singles ? RegType::Single : RegType::Register;
const u8 size = singles ? 32 : 64;
const auto reg_encoder = singles ? EncodeRegToDouble : EncodeRegToQuad;
const ARM64Reg VA = fpr.R(a, type);
ARM64Reg VC = fpr.R(c, type);
const ARM64Reg VD = fpr.RW(d, type);
ARM64Reg V0Q = ARM64Reg::INVALID_REG;
if (round_c)
{
ASSERT_MSG(DYNA_REC, !singles, "Tried to apply 25-bit precision to single");
V0Q = fpr.GetReg();
Force25BitPrecision(reg_encoder(V0Q), reg_encoder(VC));
VC = reg_encoder(V0Q);
}
m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0);
if (V0Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V0Q);
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_mulsX");
fpr.FixSinglePrecision(d);
SetFPRFIfNeeded(true, VD);
}
void JitArm64::ps_maddXX(UGeckoInstruction inst)
void JitArm64::ps_arith(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
@ -134,15 +86,21 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
const u32 d = inst.FD;
const u32 op5 = inst.SUBOP5;
const bool use_b = (op5 & ~0x1) != 12; // muls uses no B
const auto singles_func = [&] {
return fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && fpr.IsSingle(c);
};
const bool singles = singles_func();
const bool inaccurate_fma = !Config::Get(Config::SESSION_USE_FMA);
const bool singles = fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c);
const bool round_c = !js.op->fprIsSingle[inst.FC];
const RegType type = singles ? RegType::Single : RegType::Register;
const u8 size = singles ? 32 : 64;
const auto reg_encoder = singles ? EncodeRegToDouble : EncodeRegToQuad;
const ARM64Reg VA = reg_encoder(fpr.R(a, type));
const ARM64Reg VB = reg_encoder(fpr.R(b, type));
const ARM64Reg VB = use_b ? reg_encoder(fpr.R(b, type)) : ARM64Reg::INVALID_REG;
ARM64Reg VC = reg_encoder(fpr.R(c, type));
const ARM64Reg VD = reg_encoder(fpr.RW(d, type));
@ -178,6 +136,12 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
ARM64Reg result_reg = VD;
switch (op5)
{
case 12: // ps_muls0: d = a * c.ps0
m_float_emit.FMUL(size, VD, VA, VC, 0);
break;
case 13: // ps_muls1: d = a * c.ps1
m_float_emit.FMUL(size, VD, VA, VC, 1);
break;
case 14: // ps_madds0: d = a * c.ps0 + b
if (inaccurate_fma)
{
@ -269,7 +233,7 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
}
break;
default:
ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op");
ASSERT_MSG(DYNA_REC, 0, "ps_arith - invalid op");
break;
}
@ -292,8 +256,8 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
if (V1Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V1Q);
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
"Register allocation turned singles into doubles in the middle of ps_maddXX");
ASSERT_MSG(DYNA_REC, singles == singles_func(),
"Register allocation turned singles into doubles in the middle of ps_arith");
fpr.FixSinglePrecision(d);

View File

@ -108,10 +108,10 @@ constexpr std::array<GekkoOPTemplate, 13> table4{{
constexpr std::array<GekkoOPTemplate, 17> table4_2{{
{10, &JitArm64::ps_sumX}, // ps_sum0
{11, &JitArm64::ps_sumX}, // ps_sum1
{12, &JitArm64::ps_mulsX}, // ps_muls0
{13, &JitArm64::ps_mulsX}, // ps_muls1
{14, &JitArm64::ps_maddXX}, // ps_madds0
{15, &JitArm64::ps_maddXX}, // ps_madds1
{12, &JitArm64::ps_arith}, // ps_muls0
{13, &JitArm64::ps_arith}, // ps_muls1
{14, &JitArm64::ps_arith}, // ps_madds0
{15, &JitArm64::ps_arith}, // ps_madds1
{18, &JitArm64::fp_arith}, // ps_div
{20, &JitArm64::fp_arith}, // ps_sub
{21, &JitArm64::fp_arith}, // ps_add
@ -119,10 +119,10 @@ constexpr std::array<GekkoOPTemplate, 17> table4_2{{
{24, &JitArm64::ps_res}, // ps_res
{25, &JitArm64::fp_arith}, // ps_mul
{26, &JitArm64::ps_rsqrte}, // ps_rsqrte
{28, &JitArm64::ps_maddXX}, // ps_msub
{29, &JitArm64::ps_maddXX}, // ps_madd
{30, &JitArm64::ps_maddXX}, // ps_nmsub
{31, &JitArm64::ps_maddXX}, // ps_nmadd
{28, &JitArm64::ps_arith}, // ps_msub
{29, &JitArm64::ps_arith}, // ps_madd
{30, &JitArm64::ps_arith}, // ps_nmsub
{31, &JitArm64::ps_arith}, // ps_nmadd
}};
constexpr std::array<GekkoOPTemplate, 4> table4_3{{