Merge pull request #13211 from Sintendo/blendvpd

Jit_FloatingPoint: fselx - Prefer BLENDVPD over VBLENDVPD
2024-12-26 09:27:16 +00:00 · 2024-12-22 18:35:11 -05:00 · 2024-12-22 18:35:11 -05:00 · c528a70e64
commit c528a70e64
parent a1d6aa7d3e 065165f749
1 changed files with 15 additions and 1 deletions
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@ -644,6 +644,20 @@ void Jit64::fselx(UGeckoInstruction inst)

  if (cpu_info.bAVX)
  {
+    // Prefer BLENDVPD over VBLENDVPD if the latter doesn't save any
+    // instructions.
+    //
+    // VBLENDVPD allows separate source and destination registers, which can
+    // eliminate a MOVAPD/MOVSD. However, on Intel since Skylake, VBLENDVPD
+    // takes additional uops to execute compared to BLENDVPD (according to
+    // https://uops.info). On AMD and older Intel microarchitectures there is no
+    // difference.
+    if (d == c)
+    {
+      BLENDVPD(Rd, Rb);
+      return;
+    }
+
    X64Reg src1 = XMM1;
    if (Rc.IsSimpleReg())
    {
@ -654,7 +668,7 @@ void Jit64::fselx(UGeckoInstruction inst)
      MOVAPD(XMM1, Rc);
    }

-    if (d == c || packed)
+    if (packed)
    {
      VBLENDVPD(Rd, src1, Rb, XMM0);
      return;