From 40b7cc925254d113ec87366db155707df313bbf9 Mon Sep 17 00:00:00 2001 From: degasus <wickmarkus@web.de> Date: Tue, 7 Feb 2017 09:27:30 +0100 Subject: [PATCH] JitArm64: Use a custom stack with proper guard pages. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 50 ++++++++++++++++++-- Source/Core/Core/PowerPC/JitArm64/Jit.h | 8 ++-- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 17 +++++-- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 190667daad..9cb3940de0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -26,7 +26,15 @@ using namespace Arm64Gen; -static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16; +constexpr size_t CODE_SIZE = 1024 * 1024 * 32; +constexpr size_t FARCODE_SIZE = 1024 * 1024 * 16; +constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 48; + +constexpr size_t STACK_SIZE = 2 * 1024 * 1024; +constexpr size_t SAFE_STACK_SIZE = 512 * 1024; +constexpr size_t GUARD_SIZE = 0x10000; // two guards - bottom (permanent) and middle (see above) +constexpr size_t GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE; + static bool HasCycleCounters() { // Bit needs to be set to support cycle counters @@ -38,7 +46,7 @@ static bool HasCycleCounters() void JitArm64::Init() { - size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : AARCH64_FARCODE_SIZE; + size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE; AllocCodeSpace(CODE_SIZE + child_code_size); AddChildCodeSpace(&farcode, child_code_size); jo.enableBlocklink = true; @@ -56,6 +64,7 @@ void JitArm64::Init() analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW); m_enable_blr_optimization = true; + AllocStack(); GenerateAsm(); m_supports_cycle_counter = HasCycleCounters(); @@ -78,6 +87,7 @@ void JitArm64::Shutdown() { FreeCodeSpace(); blocks.Shutdown(); + FreeStack(); } void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) @@ -199,7 +209,41 @@ void JitArm64::ResetStack() return; LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer)); - SUB(SP, X0, 16); + ADD(SP, X0, 0); +} + +void JitArm64::AllocStack() +{ + if (!m_enable_blr_optimization) + return; + +#ifndef _WIN32 + m_stack_base = static_cast<u8*>(Common::AllocateMemoryPages(STACK_SIZE)); + if (!m_stack_base) + { + m_enable_blr_optimization = false; + return; + } + + m_stack_pointer = m_stack_base + GUARD_OFFSET; + Common::ReadProtectMemory(m_stack_base, GUARD_SIZE); + Common::ReadProtectMemory(m_stack_pointer, GUARD_SIZE); +#else + // For windows we just keep using the system stack and reserve a large amount of memory at the end + // of the stack. + ULONG reserveSize = SAFE_STACK_SIZE; + SetThreadStackGuarantee(&reserveSize); +#endif +} + +void JitArm64::FreeStack() +{ +#ifndef _WIN32 + if (m_stack_base) + Common::FreeMemoryPages(m_stack_base, STACK_SIZE); + m_stack_base = nullptr; + m_stack_pointer = nullptr; +#endif } void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 92b5ffa4c1..e0613f82b9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -18,9 +18,6 @@ #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/PPCAnalyst.h" -constexpr size_t CODE_SIZE = 1024 * 1024 * 32; -constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 48; - class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonAsmRoutinesBase { public: @@ -191,6 +188,9 @@ private: bool m_supports_cycle_counter; bool m_enable_blr_optimization; + u8* m_stack_base = nullptr; + u8* m_stack_pointer = nullptr; + u8* m_saved_stack_pointer = nullptr; void EmitResetCycleCounters(); void EmitGetCycles(Arm64Gen::ARM64Reg reg); @@ -226,6 +226,8 @@ private: void DoDownCount(); void Cleanup(); void ResetStack(); + void AllocStack(); + void FreeStack(); // AsmRoutines void GenerateAsm(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 41979be0fa..bd35f2ace4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -28,14 +28,24 @@ void JitArm64::GenerateAsm() MOVP2R(PPC_REG, &PowerPC::ppcState); - // Store the stack pointer, so we can reset it if the BLR optimization fails. + // Swap the stack pointer, so we have proper guard pages. ADD(X0, SP, 0); - STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer)); + MOVP2R(X1, &m_saved_stack_pointer); + STR(INDEX_UNSIGNED, X0, X1, 0); + MOVP2R(X1, &m_stack_pointer); + LDR(INDEX_UNSIGNED, X0, X1, 0); + FixupBranch no_fake_stack = CBZ(X0); + ADD(SP, X0, 0); + SetJumpTarget(no_fake_stack); // Push {nullptr; -1} as invalid destination on the stack. MOVI2R(X0, 0xFFFFFFFF); STP(INDEX_PRE, ZR, X0, SP, -16); + // Store the stack pointer, so we can reset it if the BLR optimization fails. + ADD(X0, SP, 0); + STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer)); + // The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance(). // Advance() does an exception check so we don't know what PC to use until afterwards. FixupBranch to_start_of_timing_slice = B(); @@ -161,7 +171,8 @@ void JitArm64::GenerateAsm() SetJumpTarget(Exit); // Reset the stack pointer, as the BLR optimization have touched it. - LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer)); + MOVP2R(X1, &m_saved_stack_pointer); + LDR(INDEX_UNSIGNED, X0, X1, 0); ADD(SP, X0, 0); ABI_PopRegisters(regs_to_save);