mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-27 06:35:39 +00:00
Fastmem writes for x86-64.
This commit is contained in:
parent
18abc33306
commit
2a339c926e
@ -411,31 +411,6 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
|
||||
ABI_RestoreStack(0);
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
||||
PUSH(RCX);
|
||||
PUSH(RDX);
|
||||
PUSH(RSI);
|
||||
PUSH(RDI);
|
||||
PUSH(R8);
|
||||
PUSH(R9);
|
||||
PUSH(R10);
|
||||
PUSH(R11);
|
||||
PUSH(R11);
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
|
||||
POP(R11);
|
||||
POP(R11);
|
||||
POP(R10);
|
||||
POP(R9);
|
||||
POP(R8);
|
||||
POP(RDI);
|
||||
POP(RSI);
|
||||
POP(RDX);
|
||||
POP(RCX);
|
||||
}
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
// Win64 Specific Code
|
||||
|
||||
|
@ -153,6 +153,19 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
|
||||
}
|
||||
}
|
||||
|
||||
case 0x88: // mem <- r8
|
||||
{
|
||||
info->isMemoryWrite = true;
|
||||
if (info->operandSize == 4)
|
||||
{
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
case 0x89: // mem <- r16/32/64
|
||||
{
|
||||
info->isMemoryWrite = true;
|
||||
|
@ -646,12 +646,6 @@ public:
|
||||
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||
void ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
|
||||
// A function that doesn't know anything about it's surroundings, should
|
||||
// be surrounded by these to establish a safe environment, where it can roam free.
|
||||
// An example is a backpatch injected function.
|
||||
void ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||
void ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||
|
||||
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false);
|
||||
void ABI_AlignStack(unsigned int frameSize, bool noProlog = false);
|
||||
void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false);
|
||||
|
@ -209,8 +209,6 @@ void Jit64::stfd(UGeckoInstruction inst)
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
|
||||
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);
|
||||
|
||||
|
@ -1322,9 +1322,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
|
||||
int addr_scale = SCALE_8;
|
||||
#endif
|
||||
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
|
||||
Jit->ABI_AlignStack(0);
|
||||
Jit->CALLptr(MScaled(EDX, addr_scale, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized)));
|
||||
Jit->ABI_RestoreStack(0);
|
||||
Jit->MOVAPD(reg, R(XMM0));
|
||||
RI.fregs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
@ -1429,9 +1427,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
|
||||
#endif
|
||||
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
|
||||
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
|
||||
Jit->ABI_AlignStack(0);
|
||||
Jit->CALLptr(MScaled(EDX, addr_scale, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized)));
|
||||
Jit->ABI_RestoreStack(0);
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
fregClearInst(RI, getOp1(I));
|
||||
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||
|
@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
SafeWriteRegToReg(AX, ECX, 16, 0, false, true);
|
||||
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
|
||||
RET();
|
||||
|
||||
@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||
PACKSSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
|
||||
SafeWriteRegToReg(AX, ECX, 16, 0, false, true);
|
||||
SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
|
||||
RET();
|
||||
|
||||
@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||
MOV(16, R(AX), M((char*)psTemp + 4));
|
||||
|
||||
BSWAP(32, EAX);
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
|
||||
RET();
|
||||
|
||||
@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
BSWAP(32, EAX);
|
||||
ROL(32, R(EAX), Imm8(16));
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
|
||||
RET();
|
||||
|
||||
@ -295,7 +295,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
|
||||
|
||||
// Easy!
|
||||
const u8* storeSingleFloat = AlignCode4();
|
||||
SafeWriteFloatToReg(XMM0, ECX);
|
||||
SafeWriteFloatToReg(XMM0, ECX, SAFE_WRITE_NO_FASTMEM);
|
||||
RET();
|
||||
/*
|
||||
if (cpu_info.bSSSE3) {
|
||||
@ -303,11 +303,11 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
|
||||
// TODO: SafeWriteFloat
|
||||
MOVSS(M(&psTemp[0]), XMM0);
|
||||
MOV(32, R(EAX), M(&psTemp[0]));
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, false, true);
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
} else {
|
||||
MOVSS(M(&psTemp[0]), XMM0);
|
||||
MOV(32, R(EAX), M(&psTemp[0]));
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, true, true);
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
}*/
|
||||
|
||||
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
|
||||
@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
|
||||
MAXSS(XMM0, R(XMM1));
|
||||
MINSS(XMM0, M((void *)&m_255));
|
||||
CVTTSS2SI(EAX, R(XMM0));
|
||||
SafeWriteRegToReg(AL, ECX, 8, 0, true, true);
|
||||
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
RET();
|
||||
|
||||
const u8* storeSingleS8 = AlignCode4();
|
||||
@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
|
||||
MAXSS(XMM0, M((void *)&m_m128));
|
||||
MINSS(XMM0, M((void *)&m_127));
|
||||
CVTTSS2SI(EAX, R(XMM0));
|
||||
SafeWriteRegToReg(AL, ECX, 8, 0, true, true);
|
||||
SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
RET();
|
||||
|
||||
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
|
||||
@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
|
||||
MAXSS(XMM0, R(XMM1));
|
||||
MINSS(XMM0, M((void *)&m_65535));
|
||||
CVTTSS2SI(EAX, R(XMM0));
|
||||
SafeWriteRegToReg(EAX, ECX, 16, 0, true, true);
|
||||
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
RET();
|
||||
|
||||
const u8* storeSingleS16 = AlignCode4();
|
||||
@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() {
|
||||
MAXSS(XMM0, M((void *)&m_m32768));
|
||||
MINSS(XMM0, M((void *)&m_32767));
|
||||
CVTTSS2SI(EAX, R(XMM0));
|
||||
SafeWriteRegToReg(EAX, ECX, 16, 0, true, true);
|
||||
SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM);
|
||||
RET();
|
||||
|
||||
singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
|
||||
|
@ -67,7 +67,10 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
|
||||
// It's a read. Easy.
|
||||
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||
// It ought to be necessary to align the stack here. Since it seems to not
|
||||
// affect anybody, I'm not going to add it just to be completely safe about
|
||||
// performance.
|
||||
|
||||
if (addrReg != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
|
||||
if (info.displacement) {
|
||||
@ -87,8 +90,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
|
||||
break;
|
||||
}
|
||||
|
||||
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||
|
||||
if (dataReg != EAX)
|
||||
{
|
||||
MOV(32, R(dataReg), R(EAX));
|
||||
@ -109,32 +110,24 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
|
||||
|
||||
#ifdef _M_X64
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
if (dataReg != EAX)
|
||||
PanicAlert("Backpatch write - not through EAX");
|
||||
|
||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||
|
||||
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
|
||||
// hardware access - we can take shortcuts.
|
||||
//if (emAddress == 0xCC008000)
|
||||
// PanicAlert("Caught a FIFO write");
|
||||
CMP(32, R(addrReg), Imm32(0xCC008000));
|
||||
FixupBranch skip_fast = J_CC(CC_NE, false);
|
||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||
CALL((void*)jit->GetAsmRoutines()->fifoDirectWrite32);
|
||||
RET();
|
||||
SetJumpTarget(skip_fast);
|
||||
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||
// Don't treat FIFO writes specially for now because they require a burst
|
||||
// check anyway.
|
||||
|
||||
if (dataReg == ABI_PARAM2)
|
||||
PanicAlert("Incorrect use of SafeWriteRegToReg");
|
||||
if (addrReg != ABI_PARAM1)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
|
||||
MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||
MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
|
||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||
MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg));
|
||||
MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||
}
|
||||
|
||||
if (info.displacement)
|
||||
@ -142,13 +135,25 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
|
||||
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
|
||||
}
|
||||
|
||||
SUB(64, R(RSP), Imm8(8));
|
||||
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 8:
|
||||
CALL(thunks.ProtectFunction((void *)&Memory::Write_U64, 2));
|
||||
break;
|
||||
case 4:
|
||||
CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2));
|
||||
break;
|
||||
case 2:
|
||||
CALL(thunks.ProtectFunction((void *)&Memory::Write_U16, 2));
|
||||
break;
|
||||
case 1:
|
||||
CALL(thunks.ProtectFunction((void *)&Memory::Write_U8, 2));
|
||||
break;
|
||||
}
|
||||
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||
|
||||
ADD(64, R(RSP), Imm8(8));
|
||||
RET();
|
||||
#endif
|
||||
|
||||
@ -193,21 +198,35 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
|
||||
}
|
||||
else
|
||||
{
|
||||
PanicAlert("BackPatch : Currently only supporting reads."
|
||||
"\n\nAttempted to write to %08x.", emAddress);
|
||||
|
||||
// TODO: special case FIFO writes. Also, support 32-bit mode.
|
||||
// Also, debug this so that it actually works correctly :P
|
||||
XEmitter emitter(codePtr - 2);
|
||||
// We know it's EAX so the BSWAP before will be two byte. Overwrite it.
|
||||
// We entered here with a BSWAP-ed register. We'll have to swap it back.
|
||||
u64 *ptr = ContextRN(ctx, info.regOperandReg);
|
||||
int bswapSize = 0;
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 1:
|
||||
bswapSize = 0;
|
||||
break;
|
||||
case 2:
|
||||
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap16((u16) *ptr);
|
||||
break;
|
||||
case 4:
|
||||
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap32((u32) *ptr);
|
||||
break;
|
||||
case 8:
|
||||
bswapSize = 3;
|
||||
*ptr = Common::swap64(*ptr);
|
||||
break;
|
||||
}
|
||||
|
||||
u8 *start = codePtr - bswapSize;
|
||||
XEmitter emitter(start);
|
||||
const u8 *trampoline = trampolines.GetWriteTrampoline(info);
|
||||
emitter.CALL((void *)trampoline);
|
||||
emitter.NOP((int)info.instructionSize - 3);
|
||||
if (info.instructionSize < 3)
|
||||
PanicAlert("Instruction too small");
|
||||
// We entered here with a BSWAP-ed EAX. We'll have to swap it back.
|
||||
ctx->CTX_RAX = Common::swap32((u32)ctx->CTX_RAX);
|
||||
return codePtr - 2;
|
||||
emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr());
|
||||
return start;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
|
@ -223,8 +223,27 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac
|
||||
}
|
||||
|
||||
// Destroys both arg registers
|
||||
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap, bool noProlog)
|
||||
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, int flags)
|
||||
{
|
||||
#if defined(_M_X64)
|
||||
if (!Core::g_CoreStartupParameter.bMMU &&
|
||||
Core::g_CoreStartupParameter.bFastmem &&
|
||||
!(flags & (SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_FASTMEM))
|
||||
#ifdef ENABLE_MEM_CHECK
|
||||
&& !Core::g_CoreStartupParameter.bEnableDebugging
|
||||
#endif
|
||||
)
|
||||
{
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP));
|
||||
if (accessSize == 8)
|
||||
{
|
||||
NOP(1);
|
||||
NOP(1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (offset)
|
||||
ADD(32, R(reg_addr), Imm32((u32)offset));
|
||||
|
||||
@ -245,6 +264,8 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
||||
TEST(32, R(reg_addr), Imm32(mem_mask));
|
||||
FixupBranch fast = J_CC(CC_Z);
|
||||
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
|
||||
bool noProlog = flags & SAFE_WRITE_NO_PROLOG;
|
||||
bool swap = !(flags & SAFE_WRITE_NO_SWAP);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break;
|
||||
@ -257,7 +278,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
|
||||
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int flags)
|
||||
{
|
||||
if (false && cpu_info.bSSSE3) {
|
||||
// This path should be faster but for some reason it causes errors so I've disabled it.
|
||||
@ -290,7 +311,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
|
||||
} else {
|
||||
MOVSS(M(&float_buffer), xmm_value);
|
||||
MOV(32, R(EAX), M(&float_buffer));
|
||||
SafeWriteRegToReg(EAX, reg_addr, 32, 0, true);
|
||||
SafeWriteRegToReg(EAX, reg_addr, 32, 0, flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,10 +16,16 @@ public:
|
||||
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
|
||||
void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
|
||||
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true, bool noProlog = false);
|
||||
enum SafeWriteFlags
|
||||
{
|
||||
SAFE_WRITE_NO_SWAP = 1,
|
||||
SAFE_WRITE_NO_PROLOG = 2,
|
||||
SAFE_WRITE_NO_FASTMEM = 4
|
||||
};
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, int flags = 0);
|
||||
|
||||
// Trashes both inputs and EAX.
|
||||
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr);
|
||||
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, int flags = 0);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
|
||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
|
Loading…
x
Reference in New Issue
Block a user