diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 4723c1c2cf..75cfbed3d6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -644,6 +644,20 @@ void Jit64::fselx(UGeckoInstruction inst) if (cpu_info.bAVX) { + // Prefer BLENDVPD over VBLENDVPD if the latter doesn't save any + // instructions. + // + // VBLENDVPD allows separate source and destination registers, which can + // eliminate a MOVAPD/MOVSD. However, on Intel since Skylake, VBLENDVPD + // takes additional uops to execute compared to BLENDVPD (according to + // https://uops.info). On AMD and older Intel microarchitectures there is no + // difference. + if (d == c) + { + BLENDVPD(Rd, Rb); + return; + } + X64Reg src1 = XMM1; if (Rc.IsSimpleReg()) { @@ -654,7 +668,7 @@ void Jit64::fselx(UGeckoInstruction inst) MOVAPD(XMM1, Rc); } - if (d == c || packed) + if (packed) { VBLENDVPD(Rd, src1, Rb, XMM0); return;