JitArm64: Use a custom stack with proper guard pages.

This commit is contained in:
degasus 2017-02-07 09:27:30 +01:00
parent 657639899f
commit 40b7cc9252
3 changed files with 66 additions and 9 deletions

View File

@ -26,7 +26,15 @@
using namespace Arm64Gen;
static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16;
constexpr size_t CODE_SIZE = 1024 * 1024 * 32;
constexpr size_t FARCODE_SIZE = 1024 * 1024 * 16;
constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 48;
constexpr size_t STACK_SIZE = 2 * 1024 * 1024;
constexpr size_t SAFE_STACK_SIZE = 512 * 1024;
constexpr size_t GUARD_SIZE = 0x10000; // two guards - bottom (permanent) and middle (see above)
constexpr size_t GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE;
static bool HasCycleCounters()
{
// Bit needs to be set to support cycle counters
@ -38,7 +46,7 @@ static bool HasCycleCounters()
void JitArm64::Init()
{
size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : AARCH64_FARCODE_SIZE;
size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE;
AllocCodeSpace(CODE_SIZE + child_code_size);
AddChildCodeSpace(&farcode, child_code_size);
jo.enableBlocklink = true;
@ -56,6 +64,7 @@ void JitArm64::Init()
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
m_enable_blr_optimization = true;
AllocStack();
GenerateAsm();
m_supports_cycle_counter = HasCycleCounters();
@ -78,6 +87,7 @@ void JitArm64::Shutdown()
{
FreeCodeSpace();
blocks.Shutdown();
FreeStack();
}
void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
@ -199,7 +209,41 @@ void JitArm64::ResetStack()
return;
LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
SUB(SP, X0, 16);
ADD(SP, X0, 0);
}
void JitArm64::AllocStack()
{
if (!m_enable_blr_optimization)
return;
#ifndef _WIN32
m_stack_base = static_cast<u8*>(Common::AllocateMemoryPages(STACK_SIZE));
if (!m_stack_base)
{
m_enable_blr_optimization = false;
return;
}
m_stack_pointer = m_stack_base + GUARD_OFFSET;
Common::ReadProtectMemory(m_stack_base, GUARD_SIZE);
Common::ReadProtectMemory(m_stack_pointer, GUARD_SIZE);
#else
// For windows we just keep using the system stack and reserve a large amount of memory at the end
// of the stack.
ULONG reserveSize = SAFE_STACK_SIZE;
SetThreadStackGuarantee(&reserveSize);
#endif
}
void JitArm64::FreeStack()
{
#ifndef _WIN32
if (m_stack_base)
Common::FreeMemoryPages(m_stack_base, STACK_SIZE);
m_stack_base = nullptr;
m_stack_pointer = nullptr;
#endif
}
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return)

View File

@ -18,9 +18,6 @@
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/PPCAnalyst.h"
constexpr size_t CODE_SIZE = 1024 * 1024 * 32;
constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 48;
class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonAsmRoutinesBase
{
public:
@ -191,6 +188,9 @@ private:
bool m_supports_cycle_counter;
bool m_enable_blr_optimization;
u8* m_stack_base = nullptr;
u8* m_stack_pointer = nullptr;
u8* m_saved_stack_pointer = nullptr;
void EmitResetCycleCounters();
void EmitGetCycles(Arm64Gen::ARM64Reg reg);
@ -226,6 +226,8 @@ private:
void DoDownCount();
void Cleanup();
void ResetStack();
void AllocStack();
void FreeStack();
// AsmRoutines
void GenerateAsm();

View File

@ -28,14 +28,24 @@ void JitArm64::GenerateAsm()
MOVP2R(PPC_REG, &PowerPC::ppcState);
// Store the stack pointer, so we can reset it if the BLR optimization fails.
// Swap the stack pointer, so we have proper guard pages.
ADD(X0, SP, 0);
STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
MOVP2R(X1, &m_saved_stack_pointer);
STR(INDEX_UNSIGNED, X0, X1, 0);
MOVP2R(X1, &m_stack_pointer);
LDR(INDEX_UNSIGNED, X0, X1, 0);
FixupBranch no_fake_stack = CBZ(X0);
ADD(SP, X0, 0);
SetJumpTarget(no_fake_stack);
// Push {nullptr; -1} as invalid destination on the stack.
MOVI2R(X0, 0xFFFFFFFF);
STP(INDEX_PRE, ZR, X0, SP, -16);
// Store the stack pointer, so we can reset it if the BLR optimization fails.
ADD(X0, SP, 0);
STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
// The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().
// Advance() does an exception check so we don't know what PC to use until afterwards.
FixupBranch to_start_of_timing_slice = B();
@ -161,7 +171,8 @@ void JitArm64::GenerateAsm()
SetJumpTarget(Exit);
// Reset the stack pointer, as the BLR optimization have touched it.
LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
MOVP2R(X1, &m_saved_stack_pointer);
LDR(INDEX_UNSIGNED, X0, X1, 0);
ADD(SP, X0, 0);
ABI_PopRegisters(regs_to_save);