From b84a0704cdf7dd64afa3b27439e7d029baefa648 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 25 Jul 2021 10:58:18 +0200 Subject: [PATCH] Revert "Jit: Fix correctness issue in dcbf/dcbi/dcbst" This reverts commit 66b992cfe441fedd7f21827946873bf4c0238b6f. A new (additional) correctness issue was revealed in the old AArch64 code when applying it on top of modern JitArm64: LSR was being used when LSRV was intended. This commit uses LSRV. --- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 20 +++++++++++-- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 30 ++++++++++++++++--- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 5 ++++ Source/Core/Core/PowerPC/JitCommon/JitCache.h | 26 ++++++++-------- 4 files changed, 63 insertions(+), 18 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 6b8052a48c..a0e555770d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -234,21 +234,37 @@ void Jit64::dcbx(UGeckoInstruction inst) JITDISABLE(bJITLoadStoreOff); X64Reg addr = RSCRATCH; + X64Reg value = RSCRATCH2; RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0); RCOpArg Rb = gpr.Use(inst.RB, RCMode::Read); - RegCache::Realize(Ra, Rb); + RCX64Reg tmp = gpr.Scratch(); + RegCache::Realize(Ra, Rb, tmp); MOV_sum(32, addr, Ra, Rb); - AND(32, R(addr), Imm8(~31)); + // Check whether a JIT cache line needs to be invalidated. + LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits) + SHR(32, R(value), Imm8(3 + 5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset + MOV(64, R(tmp), ImmPtr(GetBlockCache()->GetBlockBitSet())); + MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0)); + SHR(32, R(addr), Imm8(5)); + BT(32, R(value), R(addr)); + + FixupBranch c = J_CC(CC_C, true); + SwitchToFarCode(); + SetJumpTarget(c); BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); MOV(32, R(ABI_PARAM1), R(addr)); + SHL(32, R(ABI_PARAM1), Imm8(5)); MOV(32, R(ABI_PARAM2), Imm32(32)); XOR(32, R(ABI_PARAM3), R(ABI_PARAM3)); ABI_CallFunction(JitInterface::InvalidateICache); ABI_PopRegistersAndAdjustStack(registersInUse, 0); asm_routines.ResetStack(*this); + c = J(true); + SwitchToNearCode(); + SetJumpTarget(c); } void Jit64::dcbt(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index c5e3d7f6fa..0673b7ce22 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -539,9 +539,11 @@ void JitArm64::dcbx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITLoadStoreOff); - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W30); - ARM64Reg addr = ARM64Reg::W0; + ARM64Reg addr = gpr.GetReg(); + ARM64Reg value = gpr.GetReg(); + ARM64Reg WA = ARM64Reg::W30; u32 a = inst.RA, b = inst.RB; @@ -550,7 +552,21 @@ void JitArm64::dcbx(UGeckoInstruction inst) else MOV(addr, gpr.R(b)); - AND(addr, addr, LogicalImm(~31, 32)); // mask sizeof cacheline + // Check whether a JIT cache line needs to be invalidated. + AND(value, addr, LogicalImm(0x1ffffc00, 32)); // upper three bits and last 10 bit are masked for + // the bitset of cachelines, 0x1ffffc00 + LSR(value, value, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset + MOVP2R(EncodeRegTo64(WA), GetBlockCache()->GetBlockBitSet()); + LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true)); + + LSR(addr, addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset + + LSRV(value, value, addr); // move current bit to bit 0 + + FixupBranch bit_not_set = TBZ(value, 0); + FixupBranch far_addr = B(); + SwitchToFarCode(); + SetJumpTarget(far_addr); BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); @@ -558,6 +574,7 @@ void JitArm64::dcbx(UGeckoInstruction inst) ABI_PushRegisters(gprs_to_push); m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); + LSL(ARM64Reg::W0, addr, 5); MOVI2R(ARM64Reg::X1, 32); MOVI2R(ARM64Reg::X2, 0); MOVP2R(ARM64Reg::X3, &JitInterface::InvalidateICache); @@ -566,7 +583,12 @@ void JitArm64::dcbx(UGeckoInstruction inst) m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); ABI_PopRegisters(gprs_to_push); - gpr.Unlock(ARM64Reg::W0); + FixupBranch near_addr = B(); + SwitchToNearCode(); + SetJumpTarget(bit_not_set); + SetJumpTarget(near_addr); + + gpr.Unlock(addr, value, ARM64Reg::W30); } void JitArm64::dcbt(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 0a6e9bf07c..898a87b0ef 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -269,6 +269,11 @@ void JitBaseBlockCache::ErasePhysicalRange(u32 address, u32 length) } } +u32* JitBaseBlockCache::GetBlockBitSet() const +{ + return valid_block.m_valid_block.get(); +} + void JitBaseBlockCache::WriteDestroyBlock(const JitBlock& block) { } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index bf8de09f9b..35956e471e 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -99,18 +99,6 @@ typedef void (*CompiledCode)(); class ValidBlockBitSet final { public: - ValidBlockBitSet() - { - m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]); - ClearAll(); - } - - void Set(u32 bit) { m_valid_block[bit / 32] |= 1u << (bit % 32); } - void Clear(u32 bit) { m_valid_block[bit / 32] &= ~(1u << (bit % 32)); } - void ClearAll() { memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); } - bool Test(u32 bit) { return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; } - -private: enum { // ValidBlockBitSet covers the whole 32-bit address-space in 32-byte @@ -121,7 +109,19 @@ private: // The number of elements in the allocated array. Each u32 contains 32 bits. VALID_BLOCK_ALLOC_ELEMENTS = VALID_BLOCK_MASK_SIZE / 32 }; + // Directly accessed by Jit64. std::unique_ptr m_valid_block; + + ValidBlockBitSet() + { + m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]); + ClearAll(); + } + + void Set(u32 bit) { m_valid_block[bit / 32] |= 1u << (bit % 32); } + void Clear(u32 bit) { m_valid_block[bit / 32] &= ~(1u << (bit % 32)); } + void ClearAll() { memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); } + bool Test(u32 bit) { return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; } }; class JitBaseBlockCache @@ -163,6 +163,8 @@ public: void InvalidateICache(u32 address, u32 length, bool forced); void ErasePhysicalRange(u32 address, u32 length); + u32* GetBlockBitSet() const; + protected: virtual void DestroyBlock(JitBlock& block);