diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt
index d1ec894a70..e35b7cd746 100644
--- a/Source/Core/Common/CMakeLists.txt
+++ b/Source/Core/Common/CMakeLists.txt
@@ -29,7 +29,6 @@ set(SRCS Analytics.cpp
TraversalClient.cpp
Version.cpp
x64ABI.cpp
- x64Analyzer.cpp
x64Emitter.cpp
Crypto/bn.cpp
Crypto/ec.cpp
diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj
index 101b082f0e..20edab8a65 100644
--- a/Source/Core/Common/Common.vcxproj
+++ b/Source/Core/Common/Common.vcxproj
@@ -133,7 +133,6 @@
-
@@ -178,7 +177,6 @@
-
diff --git a/Source/Core/Common/Common.vcxproj.filters b/Source/Core/Common/Common.vcxproj.filters
index 6bb69f6a17..634730bf0a 100644
--- a/Source/Core/Common/Common.vcxproj.filters
+++ b/Source/Core/Common/Common.vcxproj.filters
@@ -62,7 +62,6 @@
-
Logging
@@ -253,7 +252,6 @@
-
diff --git a/Source/Core/Common/x64Analyzer.cpp b/Source/Core/Common/x64Analyzer.cpp
deleted file mode 100644
index 773f6ebdbc..0000000000
--- a/Source/Core/Common/x64Analyzer.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-// Copyright 2008 Dolphin Emulator Project
-// Licensed under GPLv2+
-// Refer to the license.txt file included.
-
-#include "Common/x64Analyzer.h"
-
-bool DisassembleMov(const unsigned char* codePtr, InstructionInfo* info)
-{
- unsigned const char* startCodePtr = codePtr;
- u8 rex = 0;
- u32 opcode;
- int opcode_length;
-
- // Check for regular prefix
- info->operandSize = 4;
- info->zeroExtend = false;
- info->signExtend = false;
- info->hasImmediate = false;
- info->isMemoryWrite = false;
- info->byteSwap = false;
-
- u8 modRMbyte = 0;
- u8 sibByte = 0;
- bool hasModRM = false;
-
- int displacementSize = 0;
-
- if (*codePtr == 0x66)
- {
- info->operandSize = 2;
- codePtr++;
- }
- else if (*codePtr == 0x67)
- {
- codePtr++;
- }
-
- // Check for REX prefix
- if ((*codePtr & 0xF0) == 0x40)
- {
- rex = *codePtr;
- if (rex & 8) // REX.W
- {
- info->operandSize = 8;
- }
- codePtr++;
- }
-
- opcode = *codePtr++;
- opcode_length = 1;
- if (opcode == 0x0F)
- {
- opcode = (opcode << 8) | *codePtr++;
- opcode_length = 2;
- if ((opcode & 0xFB) == 0x38)
- {
- opcode = (opcode << 8) | *codePtr++;
- opcode_length = 3;
- }
- }
-
- switch (opcode_length)
- {
- case 1:
- if ((opcode & 0xF0) == 0x80 || ((opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02))
- {
- modRMbyte = *codePtr++;
- hasModRM = true;
- }
- break;
- case 2:
- if (((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D) ||
- ((opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02) || (opcode & 0xF0) == 0x30 ||
- (opcode & 0xFF) == 0x77 || (opcode & 0xF0) == 0x80 || (opcode & 0xF8) == 0xC8)
- {
- // No mod R/M byte
- }
- else
- {
- modRMbyte = *codePtr++;
- hasModRM = true;
- }
- break;
- case 3:
- // TODO: support more 3-byte opcode instructions
- if ((opcode & 0xFE) == 0xF0)
- {
- modRMbyte = *codePtr++;
- hasModRM = true;
- }
- break;
- }
-
- if (hasModRM)
- {
- ModRM mrm(modRMbyte, rex);
- info->regOperandReg = mrm.reg;
- if (mrm.mod < 3)
- {
- if (mrm.rm == 4)
- {
- // SIB byte
- sibByte = *codePtr++;
- info->scaledReg = (sibByte >> 3) & 7;
- info->otherReg = (sibByte & 7);
- if (rex & 2)
- info->scaledReg += 8;
- if (rex & 1)
- info->otherReg += 8;
- }
- else
- {
- // info->scaledReg =
- }
- }
- if (mrm.mod == 1 || mrm.mod == 2)
- {
- if (mrm.mod == 1)
- displacementSize = 1;
- else
- displacementSize = 4;
- }
- }
-
- if (displacementSize == 1)
- info->displacement = (s32)(s8)*codePtr;
- else
- info->displacement = *((s32*)codePtr);
- codePtr += displacementSize;
-
- switch (opcode)
- {
- case 0xC6: // mem <- imm8
- info->isMemoryWrite = true;
- info->hasImmediate = true;
- info->immediate = *codePtr;
- info->operandSize = 1;
- codePtr++;
- break;
-
- case 0xC7: // mem <- imm16/32
- info->isMemoryWrite = true;
- switch (info->operandSize)
- {
- case 2:
- info->hasImmediate = true;
- info->immediate = *(u16*)codePtr;
- codePtr += 2;
- break;
-
- case 4:
- info->hasImmediate = true;
- info->immediate = *(u32*)codePtr;
- codePtr += 4;
- break;
-
- case 8:
- info->zeroExtend = true;
- info->immediate = *(u32*)codePtr;
- codePtr += 4;
- break;
- }
- break;
-
- case 0x88: // mem <- r8
- info->isMemoryWrite = true;
- if (info->operandSize != 4)
- {
- return false;
- }
- info->operandSize = 1;
- break;
-
- case 0x89: // mem <- r16/32/64
- info->isMemoryWrite = true;
- break;
-
- case 0x8A: // r8 <- mem
- if (info->operandSize != 4)
- {
- return false;
- }
- info->operandSize = 1;
- break;
-
- case 0x8B: // r16/32/64 <- mem
- break;
-
- case 0x0FB6: // movzx on byte
- info->zeroExtend = true;
- info->operandSize = 1;
- break;
-
- case 0x0FB7: // movzx on short
- info->zeroExtend = true;
- info->operandSize = 2;
- break;
-
- case 0x0FBE: // movsx on byte
- info->signExtend = true;
- info->operandSize = 1;
- break;
-
- case 0x0FBF: // movsx on short
- info->signExtend = true;
- info->operandSize = 2;
- break;
-
- case 0x0F38F0: // movbe read
- info->byteSwap = true;
- break;
-
- case 0x0F38F1: // movbe write
- info->byteSwap = true;
- info->isMemoryWrite = true;
- break;
-
- default:
- return false;
- }
- info->instructionSize = (int)(codePtr - startCodePtr);
- return true;
-}
-
-bool InstructionInfo::operator==(const InstructionInfo& other) const
-{
- return operandSize == other.operandSize && instructionSize == other.instructionSize &&
- regOperandReg == other.regOperandReg && otherReg == other.otherReg &&
- scaledReg == other.scaledReg && zeroExtend == other.zeroExtend &&
- signExtend == other.signExtend && hasImmediate == other.hasImmediate &&
- isMemoryWrite == other.isMemoryWrite && byteSwap == other.byteSwap &&
- immediate == other.immediate && displacement == other.displacement;
-}
diff --git a/Source/Core/Common/x64Analyzer.h b/Source/Core/Common/x64Analyzer.h
deleted file mode 100644
index de21f6ff8f..0000000000
--- a/Source/Core/Common/x64Analyzer.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2008 Dolphin Emulator Project
-// Licensed under GPLv2+
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "Common/CommonTypes.h"
-
-struct InstructionInfo
-{
- int operandSize; // 8, 16, 32, 64
- int instructionSize;
- int regOperandReg;
- int otherReg;
- int scaledReg;
- bool zeroExtend;
- bool signExtend;
- bool hasImmediate;
- bool isMemoryWrite;
- bool byteSwap;
- u64 immediate;
- s32 displacement;
-
- bool operator==(const InstructionInfo& other) const;
-};
-
-struct ModRM
-{
- int mod, reg, rm;
- ModRM(u8 modRM, u8 rex)
- {
- mod = modRM >> 6;
- reg = ((modRM >> 3) & 7) | ((rex & 4) ? 8 : 0);
- rm = modRM & 7;
- }
-};
-
-enum AccessType
-{
- OP_ACCESS_READ = 0,
- OP_ACCESS_WRITE = 1
-};
-
-bool DisassembleMov(const unsigned char* codePtr, InstructionInfo* info);
diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp
index 841d90a431..8ad1d46e40 100644
--- a/Source/Core/Common/x64Emitter.cpp
+++ b/Source/Core/Common/x64Emitter.cpp
@@ -1046,8 +1046,14 @@ void XEmitter::MOVBE(int bits, const OpArg& dest, X64Reg src)
WriteMOVBE(bits, 0xF1, src, dest);
}
-void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend)
+void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend, MovInfo* info)
{
+ if (info)
+ {
+ info->address = GetWritableCodePtr();
+ info->nonAtomicSwapStore = false;
+ }
+
switch (size)
{
case 8:
@@ -1083,20 +1089,28 @@ void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_ext
}
}
-u8* XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src)
+void XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src, MovInfo* info)
{
- u8* mov_location = GetWritableCodePtr();
if (cpu_info.bMOVBE)
{
+ if (info)
+ {
+ info->address = GetWritableCodePtr();
+ info->nonAtomicSwapStore = false;
+ }
MOVBE(size, dst, src);
}
else
{
BSWAP(size, src);
- mov_location = GetWritableCodePtr();
+ if (info)
+ {
+ info->address = GetWritableCodePtr();
+ info->nonAtomicSwapStore = true;
+ info->nonAtomicSwapStoreSrc = src;
+ }
MOV(size, dst, R(src));
}
- return mov_location;
}
void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h
index b939e79cd5..73f1d69721 100644
--- a/Source/Core/Common/x64Emitter.h
+++ b/Source/Core/Common/x64Emitter.h
@@ -203,6 +203,15 @@ enum FloatOp
class XEmitter;
+// Information about a generated MOV op
+struct MovInfo final
+{
+ u8* address;
+ bool nonAtomicSwapStore;
+ // valid iff nonAtomicSwapStore is true
+ X64Reg nonAtomicSwapStoreSrc;
+};
+
// RIP addressing does not benefit from micro op fusion on Core arch
struct OpArg
{
@@ -272,6 +281,27 @@ struct OpArg
return (s8)offset;
}
+ OpArg AsImm64() const
+ {
+ _dbg_assert_(DYNA_REC, IsImm());
+ return OpArg((u64)offset, SCALE_IMM64);
+ }
+ OpArg AsImm32() const
+ {
+ _dbg_assert_(DYNA_REC, IsImm());
+ return OpArg((u32)offset, SCALE_IMM32);
+ }
+ OpArg AsImm16() const
+ {
+ _dbg_assert_(DYNA_REC, IsImm());
+ return OpArg((u16)offset, SCALE_IMM16);
+ }
+ OpArg AsImm8() const
+ {
+ _dbg_assert_(DYNA_REC, IsImm());
+ return OpArg((u8)offset, SCALE_IMM8);
+ }
+
void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const;
bool IsImm() const
{
@@ -625,8 +655,9 @@ public:
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
void MOVBE(int bits, X64Reg dest, const OpArg& src);
void MOVBE(int bits, const OpArg& dest, X64Reg src);
- void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false);
- u8* SwapAndStore(int size, const OpArg& dst, X64Reg src);
+ void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false,
+ MovInfo* info = nullptr);
+ void SwapAndStore(int size, const OpArg& dst, X64Reg src, MovInfo* info = nullptr);
// Available only on AMD >= Phenom or Intel >= Haswell
void LZCNT(int bits, X64Reg dest, const OpArg& src);
diff --git a/Source/Core/Core/MemTools.cpp b/Source/Core/Core/MemTools.cpp
index 0788e8f176..a54cd0cfab 100644
--- a/Source/Core/Core/MemTools.cpp
+++ b/Source/Core/Core/MemTools.cpp
@@ -8,7 +8,6 @@
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/Thread.h"
-#include "Common/x64Analyzer.h"
#include "Core/HW/Memmap.h"
#include "Core/MachineContext.h"
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index 990bbd6e17..b00b63845b 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -19,6 +19,7 @@
#pragma once
#include "Common/CommonTypes.h"
+#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
index 66ad4da6f1..b28297df0a 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
@@ -287,17 +287,11 @@ void Jit64::lXXx(UGeckoInstruction inst)
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
if (update && storeAddress)
- {
- MemoryExceptionCheck();
MOV(32, gpr.R(a), opAddress);
- }
// TODO: support no-swap in SafeLoadToReg instead
if (byte_reversed)
- {
- MemoryExceptionCheck();
BSWAP(accessSize, gpr.RX(d));
- }
gpr.UnlockAll();
gpr.UnlockAllX();
@@ -507,10 +501,7 @@ void Jit64::stX(UGeckoInstruction inst)
}
if (update)
- {
- MemoryExceptionCheck();
ADD(32, gpr.R(a), Imm32((u32)offset));
- }
}
gpr.UnlockAll();
}
@@ -589,10 +580,7 @@ void Jit64::stXx(UGeckoInstruction inst)
}
if (update)
- {
- MemoryExceptionCheck();
MOV(32, gpr.R(a), R(RSCRATCH2));
- }
gpr.UnlockAll();
gpr.UnlockAllX();
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
index 6dbf8b14c4..aba308c458 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
@@ -80,7 +80,6 @@ void Jit64::lfXXX(UGeckoInstruction inst)
registersInUse[RSCRATCH2] = true;
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);
- MemoryExceptionCheck();
if (single)
{
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
@@ -193,10 +192,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
if (update)
- {
- MemoryExceptionCheck();
MOV(32, gpr.R(a), R(RSCRATCH2));
- }
fpr.UnlockAll();
gpr.UnlockAll();
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
index a1d1a223c9..0784fa1f77 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
@@ -40,74 +40,6 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;
gpr.Lock(a, b);
- if (gqrIsConstant && gqrValue == 0)
- {
- int storeOffset = 0;
- gpr.BindToRegister(a, true, update);
- X64Reg addr = gpr.RX(a);
- // TODO: this is kind of ugly :/ we should probably create a universal load/store address
- // calculation
- // function that handles all these weird cases, e.g. how non-fastmem loadstores clobber
- // addresses.
- bool storeAddress = (update && jo.memcheck) || !jo.fastmem;
- if (storeAddress)
- {
- addr = RSCRATCH2;
- MOV(32, R(addr), gpr.R(a));
- }
- if (indexed)
- {
- if (update)
- {
- ADD(32, R(addr), gpr.R(b));
- }
- else
- {
- addr = RSCRATCH2;
- if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
- {
- LEA(32, addr, MRegSum(gpr.RX(a), gpr.RX(b)));
- }
- else
- {
- MOV(32, R(addr), gpr.R(b));
- if (a)
- ADD(32, R(addr), gpr.R(a));
- }
- }
- }
- else
- {
- if (update)
- ADD(32, R(addr), Imm32(offset));
- else
- storeOffset = offset;
- }
-
- fpr.Lock(s);
- if (w)
- {
- CVTSD2SS(XMM0, fpr.R(s));
- MOVD_xmm(R(RSCRATCH), XMM0);
- }
- else
- {
- CVTPD2PS(XMM0, fpr.R(s));
- MOVQ_xmm(R(RSCRATCH), XMM0);
- ROL(64, R(RSCRATCH), Imm8(32));
- }
-
- BitSet32 registersInUse = CallerSavedRegistersInUse();
- if (update && storeAddress)
- registersInUse[addr] = true;
- SafeWriteRegToReg(RSCRATCH, addr, w ? 32 : 64, storeOffset, registersInUse);
- MemoryExceptionCheck();
- if (update && storeAddress)
- MOV(32, gpr.R(a), R(addr));
- gpr.UnlockAll();
- fpr.UnlockAll();
- return;
- }
gpr.FlushLockX(RSCRATCH_EXTRA);
if (update)
gpr.BindToRegister(a, true, true);
@@ -130,44 +62,35 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
+ if (w)
+ CVTSD2SS(XMM0, fpr.R(s)); // one
+ else
+ CVTPD2PS(XMM0, fpr.R(s)); // pair
+
if (gqrIsConstant)
{
-// Paired stores don't yield any real change in performance right now, but if we can
-// improve fastmem support this might change
-//#define INLINE_PAIRED_STORES
-#ifdef INLINE_PAIRED_STORES
- if (w)
- {
- // One value
- CVTSD2SS(XMM0, fpr.R(s));
- GenQuantizedStore(true, static_cast(gqrValue & 0x7), (gqrValue & 0x3F00) >> 8);
- }
- else
- {
- // Pair of values
- CVTPD2PS(XMM0, fpr.R(s));
- GenQuantizedStore(false, static_cast(gqrValue & 0x7),
- (gqrValue & 0x3F00) >> 8);
- }
-#else
- // We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
- // with just the scale bits.
int type = gqrValue & 0x7;
- MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
- if (w)
+ // Paired stores (other than w/type zero) don't yield any real change in
+ // performance right now, but if we can improve fastmem support this might change
+ if (gqrValue == 0)
{
- // One value
- CVTSD2SS(XMM0, fpr.R(s));
- CALL(asm_routines.singleStoreQuantized[type]);
+ if (w)
+ GenQuantizedStore(true, static_cast(type), (gqrValue & 0x3F00) >> 8);
+ else
+ GenQuantizedStore(false, static_cast(type), (gqrValue & 0x3F00) >> 8);
}
else
{
- // Pair of values
- CVTPD2PS(XMM0, fpr.R(s));
- CALL(asm_routines.pairedStoreQuantized[type]);
+ // We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
+ // with just the scale bits.
+ MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
+
+ if (w)
+ CALL(asm_routines.singleStoreQuantized[type]);
+ else
+ CALL(asm_routines.pairedStoreQuantized[type]);
}
-#endif
}
else
{
@@ -180,22 +103,13 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
if (w)
- {
- // One value
- CVTSD2SS(XMM0, fpr.R(s));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
- }
else
- {
- // Pair of values
- CVTPD2PS(XMM0, fpr.R(s));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
- }
}
if (update && jo.memcheck)
{
- MemoryExceptionCheck();
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
@@ -226,113 +140,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
gpr.Lock(a, b);
- if (gqrIsConstant && gqrValue == 0)
- {
- s32 loadOffset = 0;
- gpr.BindToRegister(a, true, update);
- X64Reg addr = gpr.RX(a);
- if (update && jo.memcheck)
- {
- addr = RSCRATCH2;
- MOV(32, R(addr), gpr.R(a));
- }
- if (indexed)
- {
- if (update)
- {
- ADD(32, R(addr), gpr.R(b));
- }
- else
- {
- addr = RSCRATCH2;
- if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
- {
- LEA(32, addr, MRegSum(gpr.RX(a), gpr.RX(b)));
- }
- else
- {
- MOV(32, R(addr), gpr.R(b));
- if (a)
- ADD(32, R(addr), gpr.R(a));
- }
- }
- }
- else
- {
- if (update)
- ADD(32, R(addr), Imm32(offset));
- else
- loadOffset = offset;
- }
-
- fpr.Lock(s);
- if (jo.memcheck)
- {
- fpr.StoreFromRegister(s);
- js.revertFprLoad = s;
- }
- fpr.BindToRegister(s, false);
-
- // Let's mirror the JitAsmCommon code and assume all non-MMU loads go to RAM.
- if (!jo.memcheck)
- {
- if (w)
- {
- if (cpu_info.bSSSE3)
- {
- MOVD_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
- PSHUFB(XMM0, M(pbswapShuffle1x4));
- UNPCKLPS(XMM0, M(m_one));
- }
- else
- {
- LoadAndSwap(32, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
- MOVD_xmm(XMM0, R(RSCRATCH));
- UNPCKLPS(XMM0, M(m_one));
- }
- }
- else
- {
- if (cpu_info.bSSSE3)
- {
- MOVQ_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
- PSHUFB(XMM0, M(pbswapShuffle2x4));
- }
- else
- {
- LoadAndSwap(64, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
- ROL(64, R(RSCRATCH), Imm8(32));
- MOVQ_xmm(XMM0, R(RSCRATCH));
- }
- }
- CVTPS2PD(fpr.RX(s), R(XMM0));
- }
- else
- {
- BitSet32 registersInUse = CallerSavedRegistersInUse();
- registersInUse[fpr.RX(s) << 16] = false;
- if (update)
- registersInUse[addr] = true;
- SafeLoadToReg(RSCRATCH, R(addr), w ? 32 : 64, loadOffset, registersInUse, false);
- MemoryExceptionCheck();
- if (w)
- {
- MOVD_xmm(XMM0, R(RSCRATCH));
- UNPCKLPS(XMM0, M(m_one));
- }
- else
- {
- ROL(64, R(RSCRATCH), Imm8(32));
- MOVQ_xmm(XMM0, R(RSCRATCH));
- }
- CVTPS2PD(fpr.RX(s), R(XMM0));
- if (update)
- MOV(32, gpr.R(a), R(addr));
- }
- gpr.UnlockAll();
- fpr.UnlockAll();
- return;
- }
gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true);
@@ -373,7 +180,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8])));
}
- MemoryExceptionCheck();
CVTPS2PD(fpr.RX(s), R(XMM0));
if (update && jo.memcheck)
{
diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
index 38d0e4a6e5..1dacdca430 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@@ -572,8 +572,6 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
MULPS(XMM0, R(XMM1));
}
}
-
- return;
}
void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h
index ac9950883e..90f0f95a49 100644
--- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h
+++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h
@@ -17,6 +17,7 @@
#pragma once
#include "Common/CommonTypes.h"
+#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp
index 33efc2ee18..83119d2189 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp
@@ -12,27 +12,12 @@
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
-#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
using namespace Gen;
-static void BackPatchError(const std::string& text, u8* codePtr, u32 emAddress)
-{
- u64 code_addr = (u64)codePtr;
- disassembler disasm;
- char disbuf[256];
- memset(disbuf, 0, 256);
- disasm.disasm64(0, code_addr, codePtr, disbuf);
- PanicAlert("%s\n\n"
- "Error encountered accessing emulated address %08x.\n"
- "Culprit instruction: \n%s\nat %#" PRIx64,
- text.c_str(), emAddress, disbuf, code_addr);
- return;
-}
-
// This generates some fairly heavy trampolines, but it doesn't really hurt.
// Only instructions that access I/O will get these, and there won't be that
// many of them in a typical program/game.
@@ -56,36 +41,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
if (!IsInSpace(codePtr))
return false; // this will become a regular crash real soon after this
- InstructionInfo info = {};
-
- if (!DisassembleMov(codePtr, &info))
- {
- BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
- return false;
- }
-
- if (info.otherReg != RMEM)
- {
- PanicAlert("BackPatch : Base reg not RMEM."
- "\n\nAttempted to access %08x.",
- emAddress);
- return false;
- }
-
- if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
- {
- PanicAlert("BackPatch: MOVBE is too small");
- return false;
- }
-
- auto it = registersInUseAtLoc.find(codePtr);
- if (it == registersInUseAtLoc.end())
+ auto it = backPatchInfo.find(codePtr);
+ if (it == backPatchInfo.end())
{
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
return false;
}
- BitSet32 registersInUse = it->second;
+ TrampolineInfo& info = it->second;
u8* exceptionHandler = nullptr;
if (jit->jo.memcheck)
@@ -95,110 +58,67 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
exceptionHandler = it2->second;
}
- // Compute the start and length of the memory operation, including
- // any byteswapping.
- int totalSize = info.instructionSize;
- u8* start = codePtr;
- if (!info.isMemoryWrite)
- {
- // MOVBE and single bytes don't need to be swapped.
- if (!info.byteSwap && info.operandSize > 1)
- {
- // REX
- if ((codePtr[totalSize] & 0xF0) == 0x40)
- totalSize++;
-
- // BSWAP
- if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8)
- totalSize += 2;
-
- if (info.operandSize == 2)
- {
- // operand size override
- if (codePtr[totalSize] == 0x66)
- totalSize++;
- // REX
- if ((codePtr[totalSize] & 0xF0) == 0x40)
- totalSize++;
- // SAR/ROL
- _assert_(codePtr[totalSize] == 0xC1 &&
- (codePtr[totalSize + 2] == 0x10 || codePtr[totalSize + 2] == 0x08));
- info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
- totalSize += 3;
- }
- }
- }
- else
- {
- if (info.byteSwap || info.hasImmediate)
- {
- // The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
- }
- else
- {
- // We entered here with a BSWAP-ed register. We'll have to swap it back.
- u64* ptr = ContextRN(ctx, info.regOperandReg);
- int bswapSize = 0;
- switch (info.operandSize)
- {
- case 1:
- bswapSize = 0;
- break;
- case 2:
- bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
- *ptr = Common::swap16((u16)*ptr);
- break;
- case 4:
- bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
- *ptr = Common::swap32((u32)*ptr);
- break;
- case 8:
- bswapSize = 3;
- *ptr = Common::swap64(*ptr);
- break;
- }
- start = codePtr - bswapSize;
- totalSize += bswapSize;
- }
- }
-
// In the trampoline code, we jump back into the block at the beginning
// of the next instruction. The next instruction comes immediately
// after the backpatched operation, or BACKPATCH_SIZE bytes after the start
// of the backpatched operation, whichever comes last. (The JIT inserts NOPs
// into the original code if necessary to ensure there is enough space
// to insert the backpatch jump.)
- int padding = totalSize > BACKPATCH_SIZE ? totalSize - BACKPATCH_SIZE : 0;
- u8* returnPtr = start + 5 + padding;
+
+ jit->js.generatingTrampoline = true;
+ jit->js.trampolineExceptionHandler = exceptionHandler;
// Generate the trampoline.
- const u8* trampoline;
- if (info.isMemoryWrite)
- {
- // TODO: special case FIFO writes.
- auto it3 = pcAtLoc.find(codePtr);
- if (it3 == pcAtLoc.end())
- {
- PanicAlert("BackPatch: no pc entry for address %p", codePtr);
- return false;
- }
+ const u8* trampoline = trampolines.GenerateTrampoline(info);
+ jit->js.generatingTrampoline = false;
+ jit->js.trampolineExceptionHandler = nullptr;
- u32 pc = it3->second;
- trampoline =
- trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
- }
- else
- {
- trampoline =
- trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
- }
+ u8* start = info.start;
// Patch the original memory operation.
XEmitter emitter(start);
emitter.JMP(trampoline, true);
- for (int i = 0; i < padding; ++i)
+ // NOPs become dead code
+ const u8* end = info.start + info.len;
+ for (const u8* i = emitter.GetCodePtr(); i < end; ++i)
emitter.INT3();
- ctx->CTX_PC = (u64)start;
+
+ // Rewind time to just before the start of the write block. If we swapped memory
+ // before faulting (eg: the store+swap was not an atomic op like MOVBE), let's
+ // swap it back so that the swap can happen again (this double swap isn't ideal but
+ // only happens the first time we fault).
+ if (info.nonAtomicSwapStoreSrc != INVALID_REG)
+ {
+ u64* ptr = ContextRN(ctx, info.nonAtomicSwapStoreSrc);
+ switch (info.accessSize << 3)
+ {
+ case 8:
+ // No need to swap a byte
+ break;
+ case 16:
+ *ptr = Common::swap16(static_cast(*ptr));
+ break;
+ case 32:
+ *ptr = Common::swap32(static_cast(*ptr));
+ break;
+ case 64:
+ *ptr = Common::swap64(static_cast(*ptr));
+ break;
+ default:
+ _dbg_assert_(DYNA_REC, 0);
+ break;
+ }
+ }
+
+ // This is special code to undo the LEA in SafeLoadToReg if it clobbered the address
+ // register in the case where reg_value shared the same location as opAddress.
+ if (info.offsetAddedToAddress)
+ {
+ u64* ptr = ContextRN(ctx, info.op_arg.GetSimpleReg());
+ *ptr -= static_cast(info.offset);
+ }
+
+ ctx->CTX_PC = reinterpret_cast(trampoline);
return true;
}
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
index 245f73df66..af69116fd6 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
@@ -96,6 +96,9 @@ protected:
bool carryFlagSet;
bool carryFlagInverted;
+ bool generatingTrampoline;
+ u8* trampolineExceptionHandler;
+
int fifoBytesThisBlock;
PPCAnalyst::BlockStats st;
diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
index ef4d5f3eef..fb0998f284 100644
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
@@ -18,6 +18,26 @@ using namespace Gen;
void EmuCodeBlock::MemoryExceptionCheck()
{
+ // TODO: We really should untangle the trampolines, exception handlers and
+ // memory checks.
+
+ // If we are currently generating a trampoline for a failed fastmem
+ // load/store, the trampoline generator will have stashed the exception
+ // handler (that we previously generated after the fastmem instruction) in
+ // trampolineExceptionHandler.
+ if (jit->js.generatingTrampoline)
+ {
+ if (jit->js.trampolineExceptionHandler)
+ {
+ TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
+ J_CC(CC_NZ, jit->js.trampolineExceptionHandler);
+ }
+ return;
+ }
+
+ // If memcheck (ie: MMU) mode is enabled and we haven't generated an
+ // exception handler for this instruction yet, we will generate an
+ // exception check.
if (jit->jo.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
@@ -42,10 +62,10 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
}
-u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
- bool signExtend)
+bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
+ bool signExtend, MovInfo* info)
{
- u8* result;
+ bool offsetAddedToAddress = false;
OpArg memOperand;
if (opAddress.IsSimpleReg())
{
@@ -57,6 +77,11 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
// place to address the issue.)
if ((u32)offset >= 0x1000)
{
+ // This method can potentially clobber the address if it shares a register
+ // with the load target. In this case we can just subtract offset from the
+ // register (see JitBackpatch for this implementation).
+ offsetAddedToAddress = (reg_value == opAddress.GetSimpleReg());
+
LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset));
opAddress = R(reg_value);
offset = 0;
@@ -74,9 +99,8 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
memOperand = MComplex(RMEM, reg_value, SCALE_1, offset);
}
- result = GetWritableCodePtr();
- LoadAndSwap(accessSize, reg_value, memOperand, signExtend);
- return result;
+ LoadAndSwap(accessSize, reg_value, memOperand, signExtend, info);
+ return offsetAddedToAddress;
}
// Visitor that generates code to read a MMIO value.
@@ -231,72 +255,43 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
{
- registersInUse[reg_value] = false;
- if (jit->jo.fastmem && !opAddress.IsImm() &&
- !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)))
- {
- u8* mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
+ bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
- registersInUseAtLoc[mov] = registersInUse;
- jit->js.fastmemLoadStore = mov;
+ registersInUse[reg_value] = false;
+ if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
+ {
+ u8* backpatchStart = GetWritableCodePtr();
+ MovInfo mov;
+ bool offsetAddedToAddress =
+ UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend, &mov);
+ TrampolineInfo& info = backPatchInfo[mov.address];
+ info.pc = jit->js.compilerPC;
+ info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
+ info.start = backpatchStart;
+ info.read = true;
+ info.op_reg = reg_value;
+ info.op_arg = opAddress;
+ info.offsetAddedToAddress = offsetAddedToAddress;
+ info.accessSize = accessSize >> 3;
+ info.offset = offset;
+ info.registersInUse = registersInUse;
+ info.flags = flags;
+ info.signExtend = signExtend;
+ ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
+ if (padding > 0)
+ {
+ NOP(padding);
+ }
+ info.len = static_cast(GetCodePtr() - info.start);
+
+ jit->js.fastmemLoadStore = mov.address;
return;
}
- u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
-
- // The following masks the region used by the GC/Wii virtual memory lib
- mem_mask |= Memory::ADDR_MASK_MEM1;
-
if (opAddress.IsImm())
{
u32 address = opAddress.Imm32() + offset;
-
- // If the address is known to be RAM, just load it directly.
- if (PowerPC::IsOptimizableRAMAddress(address))
- {
- UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
- return;
- }
-
- // If the address maps to an MMIO register, inline MMIO read code.
- u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
- if (accessSize != 64 && mmioAddress)
- {
- MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
- signExtend);
- return;
- }
-
- // Fall back to general-case code.
- ABI_PushRegistersAndAdjustStack(registersInUse, 0);
- switch (accessSize)
- {
- case 64:
- ABI_CallFunctionC((void*)&PowerPC::Read_U64, address);
- break;
- case 32:
- ABI_CallFunctionC((void*)&PowerPC::Read_U32, address);
- break;
- case 16:
- ABI_CallFunctionC((void*)&PowerPC::Read_U16_ZX, address);
- break;
- case 8:
- ABI_CallFunctionC((void*)&PowerPC::Read_U8_ZX, address);
- break;
- }
- ABI_PopRegistersAndAdjustStack(registersInUse, 0);
-
- MemoryExceptionCheck();
- if (signExtend && accessSize < 32)
- {
- // Need to sign extend values coming from the Read_U* functions.
- MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
- }
- else if (reg_value != ABI_RETURN)
- {
- MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
- }
-
+ SafeLoadToRegImmediate(reg_value, address, accessSize, registersInUse, signExtend);
return;
}
@@ -310,8 +305,13 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
}
FixupBranch exit;
- if (!jit->jo.alwaysUseMemFuncs)
+ if (!jit->jo.alwaysUseMemFuncs && !slowmem)
{
+ u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
+
+ // The following masks the region used by the GC/Wii virtual memory lib
+ mem_mask |= Memory::ADDR_MASK_MEM1;
+
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask);
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
if (farcode.Enabled())
@@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
}
- if (!jit->jo.alwaysUseMemFuncs)
+ if (!jit->jo.alwaysUseMemFuncs && !slowmem)
{
if (farcode.Enabled())
{
@@ -361,6 +361,56 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
}
}
+void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int accessSize,
+ BitSet32 registersInUse, bool signExtend)
+{
+ // If the address is known to be RAM, just load it directly.
+ if (PowerPC::IsOptimizableRAMAddress(address))
+ {
+ UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend);
+ return;
+ }
+
+ // If the address maps to an MMIO register, inline MMIO read code.
+ u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
+ if (accessSize != 64 && mmioAddress)
+ {
+ MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
+ signExtend);
+ return;
+ }
+
+ // Fall back to general-case code.
+ ABI_PushRegistersAndAdjustStack(registersInUse, 0);
+ switch (accessSize)
+ {
+ case 64:
+ ABI_CallFunctionC(reinterpret_cast(&PowerPC::Read_U64), address);
+ break;
+ case 32:
+ ABI_CallFunctionC(reinterpret_cast(&PowerPC::Read_U32), address);
+ break;
+ case 16:
+ ABI_CallFunctionC(reinterpret_cast(&PowerPC::Read_U16_ZX), address);
+ break;
+ case 8:
+ ABI_CallFunctionC(reinterpret_cast(&PowerPC::Read_U8_ZX), address);
+ break;
+ }
+ ABI_PopRegistersAndAdjustStack(registersInUse, 0);
+
+ MemoryExceptionCheck();
+ if (signExtend && accessSize < 32)
+ {
+ // Need to sign extend values coming from the Read_U* functions.
+ MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
+ }
+ else if (reg_value != ABI_RETURN)
+ {
+ MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
+ }
+}
+
static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
{
if (accessSize == 32)
@@ -371,10 +421,15 @@ static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
return Imm8(reg_value.Imm8());
}
-u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
- bool swap)
+void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
+ bool swap, MovInfo* info)
{
- u8* result = GetWritableCodePtr();
+ if (info)
+ {
+ info->address = GetWritableCodePtr();
+ info->nonAtomicSwapStore = false;
+ }
+
OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset);
if (reg_value.IsImm())
{
@@ -384,22 +439,19 @@ u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
}
else if (swap)
{
- result = SwapAndStore(accessSize, dest, reg_value.GetSimpleReg());
+ SwapAndStore(accessSize, dest, reg_value.GetSimpleReg(), info);
}
else
{
MOV(accessSize, dest, reg_value);
}
-
- return result;
}
static OpArg FixImmediate(int accessSize, OpArg arg)
{
if (arg.IsImm())
{
- arg = accessSize == 8 ? Imm8((u8)arg.Imm32()) : accessSize == 16 ? Imm16((u16)arg.Imm32()) :
- Imm32((u32)arg.Imm32());
+ arg = accessSize == 8 ? arg.AsImm8() : accessSize == 16 ? arg.AsImm16() : arg.AsImm32();
}
return arg;
}
@@ -475,25 +527,38 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
BitSet32 registersInUse, int flags)
{
+ bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
+ bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
+
// set the correct immediate format
reg_value = FixImmediate(accessSize, reg_value);
- // TODO: support byte-swapped non-immediate fastmem stores
- if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) &&
- (reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP)))
+ if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
{
- const u8* backpatchStart = GetCodePtr();
- u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset,
- !(flags & SAFE_LOADSTORE_NO_SWAP));
+ u8* backpatchStart = GetWritableCodePtr();
+ MovInfo mov;
+ UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, swap, &mov);
+ TrampolineInfo& info = backPatchInfo[mov.address];
+ info.pc = jit->js.compilerPC;
+ info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
+ info.start = backpatchStart;
+ info.read = false;
+ info.op_arg = reg_value;
+ info.op_reg = reg_addr;
+ info.offsetAddedToAddress = false;
+ info.accessSize = accessSize >> 3;
+ info.offset = offset;
+ info.registersInUse = registersInUse;
+ info.flags = flags;
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
if (padding > 0)
{
NOP(padding);
}
+ info.len = static_cast(GetCodePtr() - info.start);
+
+ jit->js.fastmemLoadStore = mov.address;
- registersInUseAtLoc[mov] = registersInUse;
- pcAtLoc[mov] = jit->js.compilerPC;
- jit->js.fastmemLoadStore = mov;
return;
}
@@ -510,21 +575,22 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
}
}
- u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
-
- // The following masks the region used by the GC/Wii virtual memory lib
- mem_mask |= Memory::ADDR_MASK_MEM1;
-
- bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
-
FixupBranch slow, exit;
- slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
- UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
- if (farcode.Enabled())
- SwitchToFarCode();
- else
- exit = J(true);
- SetJumpTarget(slow);
+ if (!slowmem)
+ {
+ u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
+
+ // The following masks the region used by the GC/Wii virtual memory lib
+ mem_mask |= Memory::ADDR_MASK_MEM1;
+
+ slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
+ UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
+ if (farcode.Enabled())
+ SwitchToFarCode();
+ else
+ exit = J(true);
+ SetJumpTarget(slow);
+ }
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
@@ -563,12 +629,18 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
- if (farcode.Enabled())
+
+ MemoryExceptionCheck();
+
+ if (!slowmem)
{
- exit = J(true);
- SwitchToNearCode();
+ if (farcode.Enabled())
+ {
+ exit = J(true);
+ SwitchToNearCode();
+ }
+ SetJumpTarget(exit);
}
- SetJumpTarget(exit);
}
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap)
@@ -1055,7 +1127,6 @@ void EmuCodeBlock::JitClearCA()
void EmuCodeBlock::Clear()
{
- registersInUseAtLoc.clear();
- pcAtLoc.clear();
+ backPatchInfo.clear();
exceptionHandlerAtLoc.clear();
}
diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
index e50ad9af6e..54deb7e7a1 100644
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
@@ -59,6 +59,47 @@ static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48;
static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8;
static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32;
+// Stores information we need to batch-patch a MOV with a call to the slow read/write path after
+// it faults. There will be 10s of thousands of these structs live, so be wary of making this too
+// big.
+struct TrampolineInfo final
+{
+ // The start of the store operation that failed -- we will patch a JMP here
+ u8* start;
+
+ // The start + len = end of the store operation (points to the next instruction)
+ u32 len;
+
+ // The PPC PC for the current load/store block
+ u32 pc;
+
+ // Saved because we need these to make the ABI call in the trampoline
+ BitSet32 registersInUse;
+
+ // The MOV operation
+ Gen::X64Reg nonAtomicSwapStoreSrc;
+
+ // src/dest for load/store
+ s32 offset;
+ Gen::X64Reg op_reg;
+ Gen::OpArg op_arg;
+
+ // Original SafeLoadXXX/SafeStoreXXX flags
+ u8 flags;
+
+ // Memory access size (in bytes)
+ u8 accessSize : 4;
+
+ // true if this is a read op vs a write
+ bool read : 1;
+
+ // for read operations, true if needs sign-extension after load
+ bool signExtend : 1;
+
+ // Set to true if we added the offset to the address and need to undo it
+ bool offsetAddedToAddress : 1;
+};
+
// Like XCodeBlock but has some utilities for memory access.
class EmuCodeBlock : public Gen::X64CodeBlock
{
@@ -88,15 +129,15 @@ public:
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
s32 offset, bool signExtend = false);
// these return the address of the MOV, for backpatching
- u8* UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
- s32 offset = 0, bool swap = true);
- u8* UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
- s32 offset = 0, bool swap = true)
+ void UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
+ s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr);
+ void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
+ s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr)
{
- return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap);
+ UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap, info);
}
- u8* UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
- bool signExtend);
+ bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
+ bool signExtend, Gen::MovInfo* info = nullptr);
void UnsafeWriteGatherPipe(int accessSize);
// Generate a load/write from the MMIO handler for a given address. Only
@@ -108,12 +149,18 @@ public:
{
SAFE_LOADSTORE_NO_SWAP = 1,
SAFE_LOADSTORE_NO_PROLOG = 2,
+ // This indicates that the write being generated cannot be patched (and thus can't use fastmem)
SAFE_LOADSTORE_NO_FASTMEM = 4,
- SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
+ SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
+ // Force slowmem (used when generating fallbacks in trampolines)
+ SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
};
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,
BitSet32 registersInUse, bool signExtend, int flags = 0);
+ void SafeLoadToRegImmediate(Gen::X64Reg reg_value, u32 address, int accessSize,
+ BitSet32 registersInUse, bool signExtend);
+
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
// reg_value if the load fails and js.memcheck is enabled.
// Works with immediate inputs and simple registers only.
@@ -158,7 +205,6 @@ public:
void Clear();
protected:
- std::unordered_map registersInUseAtLoc;
- std::unordered_map pcAtLoc;
+ std::unordered_map backPatchInfo;
std::unordered_map exceptionHandlerAtLoc;
};
diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp
index 12b72b1035..79c0a5abee 100644
--- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp
@@ -9,7 +9,6 @@
#include "Common/CommonTypes.h"
#include "Common/JitRegister.h"
#include "Common/x64ABI.h"
-#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/JitCommon/Jit_Util.h"
@@ -37,150 +36,50 @@ void TrampolineCache::Shutdown()
FreeCodeSpace();
}
-const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo& info,
- BitSet32 registersInUse, u8* exceptionHandler,
- u8* returnPtr)
+const u8* TrampolineCache::GenerateTrampoline(const TrampolineInfo& info)
+{
+ if (info.read)
+ {
+ return GenerateReadTrampoline(info);
+ }
+
+ return GenerateWriteTrampoline(info);
+}
+
+const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
const u8* trampoline = GetCodePtr();
- X64Reg addrReg = (X64Reg)info.scaledReg;
- X64Reg dataReg = (X64Reg)info.regOperandReg;
- int stack_offset = 0;
- bool push_param1 = registersInUse[ABI_PARAM1];
- if (push_param1)
- {
- PUSH(ABI_PARAM1);
- stack_offset = 8;
- registersInUse[ABI_PARAM1] = 0;
- }
+ SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse,
+ info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
- int dataRegSize = info.operandSize == 8 ? 64 : 32;
- if (addrReg != ABI_PARAM1 && info.displacement)
- LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
- else if (addrReg != ABI_PARAM1)
- MOV(32, R(ABI_PARAM1), R(addrReg));
- else if (info.displacement)
- ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
+ JMP(info.start + info.len, true);
- ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset);
-
- switch (info.operandSize)
- {
- case 8:
- CALL((void*)&PowerPC::Read_U64);
- break;
- case 4:
- CALL((void*)&PowerPC::Read_U32);
- break;
- case 2:
- CALL((void*)&PowerPC::Read_U16);
- break;
- case 1:
- CALL((void*)&PowerPC::Read_U8);
- break;
- }
-
- ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
-
- if (push_param1)
- POP(ABI_PARAM1);
-
- if (exceptionHandler)
- {
- TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
- J_CC(CC_NZ, exceptionHandler);
- }
-
- if (info.signExtend)
- MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
- else if (dataReg != ABI_RETURN || info.operandSize < 4)
- MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
-
- JMP(returnPtr, true);
-
- JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline");
+ JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline_%x", info.pc);
return trampoline;
}
-const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo& info,
- BitSet32 registersInUse, u8* exceptionHandler,
- u8* returnPtr, u32 pc)
+const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
const u8* trampoline = GetCodePtr();
- X64Reg dataReg = (X64Reg)info.regOperandReg;
- X64Reg addrReg = (X64Reg)info.scaledReg;
-
// Don't treat FIFO writes specially for now because they require a burst
// check anyway.
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
- MOV(32, PPCSTATE(pc), Imm32(pc));
+ MOV(32, PPCSTATE(pc), Imm32(info.pc));
- ABI_PushRegistersAndAdjustStack(registersInUse, 0);
+ SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset,
+ info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
- if (info.hasImmediate)
- {
- if (addrReg != ABI_PARAM2 && info.displacement)
- LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement));
- else if (addrReg != ABI_PARAM2)
- MOV(32, R(ABI_PARAM2), R(addrReg));
- else if (info.displacement)
- ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
+ JMP(info.start + info.len, true);
- // we have to swap back the immediate to pass it to the write functions
- switch (info.operandSize)
- {
- case 8:
- PanicAlert("Invalid 64-bit immediate!");
- break;
- case 4:
- MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
- break;
- case 2:
- MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
- break;
- case 1:
- MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
- break;
- }
- }
- else
- {
- int dataRegSize = info.operandSize == 8 ? 64 : 32;
- MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg);
- }
-
- switch (info.operandSize)
- {
- case 8:
- CALL((void*)&PowerPC::Write_U64);
- break;
- case 4:
- CALL((void*)&PowerPC::Write_U32);
- break;
- case 2:
- CALL((void*)&PowerPC::Write_U16);
- break;
- case 1:
- CALL((void*)&PowerPC::Write_U8);
- break;
- }
-
- ABI_PopRegistersAndAdjustStack(registersInUse, 0);
- if (exceptionHandler)
- {
- TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
- J_CC(CC_NZ, exceptionHandler);
- }
- JMP(returnPtr, true);
-
- JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", pc);
+ JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", info.pc);
return trampoline;
}
diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h
index 7852bae6b9..c43668dc8d 100644
--- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h
+++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h
@@ -7,21 +7,21 @@
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
+#include "Core/PowerPC/JitCommon/Jit_Util.h"
struct InstructionInfo;
// We need at least this many bytes for backpatching.
const int BACKPATCH_SIZE = 5;
-class TrampolineCache : public Gen::X64CodeBlock
+class TrampolineCache : public EmuCodeBlock
{
+ const u8* GenerateReadTrampoline(const TrampolineInfo& info);
+ const u8* GenerateWriteTrampoline(const TrampolineInfo& info);
+
public:
void Init(int size);
void Shutdown();
-
- const u8* GenerateReadTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
- u8* exceptionHandler, u8* returnPtr);
- const u8* GenerateWriteTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
- u8* exceptionHandler, u8* returnPtr, u32 pc);
+ const u8* GenerateTrampoline(const TrampolineInfo& info);
void ClearCodeSpace();
};