mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-26 21:35:28 +00:00
JIT: use far code emitter in more places
This commit is contained in:
parent
c5381bae66
commit
54e26f64c6
@ -167,11 +167,13 @@ void Jit64::Init()
|
||||
|
||||
trampolines.Init();
|
||||
AllocCodeSpace(CODE_SIZE);
|
||||
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
|
||||
|
||||
blocks.Init();
|
||||
asm_routines.Init();
|
||||
|
||||
// important: do this *after* generating the global asm routines, because we can't use farcode in them.
|
||||
// it'll crash because the farcode functions get cleared on JIT clears.
|
||||
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
|
||||
|
||||
code_block.m_stats = &js.st;
|
||||
code_block.m_gpa = &js.gpa;
|
||||
code_block.m_fpa = &js.fpa;
|
||||
|
@ -279,18 +279,20 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
||||
ADD(32, R(RSCRATCH), gpr.R(a));
|
||||
AND(32, R(RSCRATCH), Imm32(~31));
|
||||
TEST(32, R(RSCRATCH), Imm32(mem_mask));
|
||||
FixupBranch fast = J_CC(CC_Z, true);
|
||||
FixupBranch slow = J_CC(CC_NZ, true);
|
||||
|
||||
// Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but
|
||||
// supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure.
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(slow);
|
||||
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
|
||||
u32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH);
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
FixupBranch exit = J(true);
|
||||
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(fast);
|
||||
SwitchToNearCode();
|
||||
PXOR(XMM0, R(XMM0));
|
||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
|
||||
|
@ -271,11 +271,11 @@ void JitIL::Init()
|
||||
|
||||
trampolines.Init();
|
||||
AllocCodeSpace(CODE_SIZE);
|
||||
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
|
||||
|
||||
blocks.Init();
|
||||
asm_routines.Init();
|
||||
|
||||
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
|
||||
|
||||
code_block.m_stats = &js.st;
|
||||
code_block.m_gpa = &js.gpa;
|
||||
code_block.m_fpa = &js.fpa;
|
||||
|
@ -351,7 +351,14 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||
}
|
||||
TEST(32, addr_loc, Imm32(mem_mask));
|
||||
|
||||
FixupBranch fast = J_CC(CC_Z, true);
|
||||
FixupBranch slow, exit;
|
||||
slow = J_CC(CC_NZ, farcode.Enabled());
|
||||
UnsafeLoadToReg(reg_value, addr_loc, accessSize, 0, signExtend);
|
||||
if (farcode.Enabled())
|
||||
SwitchToFarCode();
|
||||
else
|
||||
exit = J(true);
|
||||
SetJumpTarget(slow);
|
||||
|
||||
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
||||
@ -385,10 +392,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||
}
|
||||
|
||||
MEMCHECK_END
|
||||
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(fast);
|
||||
UnsafeLoadToReg(reg_value, addr_loc, accessSize, 0, signExtend);
|
||||
if (farcode.Enabled())
|
||||
{
|
||||
exit = J(true);
|
||||
SwitchToNearCode();
|
||||
}
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
}
|
||||
@ -470,12 +478,21 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
||||
}
|
||||
#endif
|
||||
|
||||
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
||||
|
||||
FixupBranch slow, exit;
|
||||
TEST(32, R(reg_addr), Imm32(mem_mask));
|
||||
FixupBranch fast = J_CC(CC_Z, true);
|
||||
slow = J_CC(CC_NZ, farcode.Enabled());
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
||||
if (farcode.Enabled())
|
||||
SwitchToFarCode();
|
||||
else
|
||||
exit = J(true);
|
||||
SetJumpTarget(slow);
|
||||
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||
|
||||
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
|
||||
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
||||
switch (accessSize)
|
||||
{
|
||||
@ -493,9 +510,11 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
||||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(fast);
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
||||
if (farcode.Enabled())
|
||||
{
|
||||
exit = J(true);
|
||||
SwitchToNearCode();
|
||||
}
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
@ -659,15 +678,17 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
||||
// to save an instruction, since diverting a few more floats to the slow path can't hurt much.
|
||||
SUB(8, R(RSCRATCH), Imm8(0x6D));
|
||||
CMP(8, R(RSCRATCH), Imm8(0x3));
|
||||
FixupBranch x87Conversion = J_CC(CC_BE);
|
||||
FixupBranch x87Conversion = J_CC(CC_BE, true);
|
||||
CVTSD2SS(dst, R(src));
|
||||
FixupBranch continue1 = J();
|
||||
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(x87Conversion);
|
||||
MOVSD(M(&temp64), src);
|
||||
FLD(64, M(&temp64));
|
||||
FSTP(32, M(&temp32));
|
||||
MOVSS(dst, M(&temp32));
|
||||
FixupBranch continue1 = J(true);
|
||||
SwitchToNearCode();
|
||||
|
||||
SetJumpTarget(continue1);
|
||||
// We'd normally need to MOVDDUP here to put the single in the top half of the output register too, but
|
||||
@ -696,16 +717,17 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
|
||||
// through the slow path (0x00800000), but the performance effects of that should be negligible.
|
||||
SUB(32, R(gprsrc), Imm8(1));
|
||||
TEST(32, R(gprsrc), Imm32(0x7f800000));
|
||||
|
||||
FixupBranch x87Conversion = J_CC(CC_Z);
|
||||
FixupBranch x87Conversion = J_CC(CC_Z, true);
|
||||
CVTSS2SD(dst, R(dst));
|
||||
FixupBranch continue1 = J();
|
||||
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(x87Conversion);
|
||||
MOVSS(M(&temp32), dst);
|
||||
FLD(32, M(&temp32));
|
||||
FSTP(64, M(&temp64));
|
||||
MOVSD(dst, M(&temp64));
|
||||
FixupBranch continue1 = J(true);
|
||||
SwitchToNearCode();
|
||||
|
||||
SetJumpTarget(continue1);
|
||||
MOVDDUP(dst, R(dst));
|
||||
|
@ -36,9 +36,12 @@ namespace MMIO { class Mapping; }
|
||||
// exception branches.
|
||||
class FarCodeCache : public Gen::X64CodeBlock
|
||||
{
|
||||
private:
|
||||
bool m_enabled = false;
|
||||
public:
|
||||
void Init(int size) { AllocCodeSpace(size); }
|
||||
void Shutdown() { FreeCodeSpace(); }
|
||||
bool Enabled() { return m_enabled; }
|
||||
void Init(int size) { AllocCodeSpace(size); m_enabled = true; }
|
||||
void Shutdown() { FreeCodeSpace(); m_enabled = false; }
|
||||
};
|
||||
|
||||
// Like XCodeBlock but has some utilities for memory access.
|
||||
|
Loading…
x
Reference in New Issue
Block a user