Support for dynamic BAT modification (dynamic-bat).

Fundamentally, all this does is enforce the invariant that we always
translate effective addresses based on the current BAT registers and
page table before we do anything else with them.

This change can be logically divided into three parts.  The first part is
creating a table to represent the current BAT state, and keeping it up to
date (PowerPC::IBATUpdated, PowerPC::DBATUpdated, etc.).  This does
nothing by itself, but it's necessary for the other parts.

The second part (mostly in MMU.cpp) is simply removing all the hardcoded
checks for specific untranslated addresses, and consistently translating
addresses using the current BAT configuration. Very straightforward, but a
lot of code changes because we hardcoded assumptions all over the place.

The third part (mostly in Memmap.cpp) is making the fastmem arena reflect
the current BAT configuration.  We do this by redoing the mapping (calling
memmap()) based on the BAT table whenever it changes.

One additional minor change is that translation can fail in two ways:
either the segment is a direct store segment, or page table lookup failed.
The difference doesn't usually matter, but the difference affects cache
instructions, like dcbz.
This commit is contained in:
magumagu 2016-06-25 18:58:09 -07:00 committed by degasus
parent 3e0355e7f6
commit d557310371
20 changed files with 689 additions and 598 deletions

View File

@ -157,118 +157,3 @@ u8* MemArena::FindMemoryBase()
return static_cast<u8*>(base);
#endif
}
// yeah, this could also be done in like two bitwise ops...
#define SKIP(a_flags, b_flags) \
if (!(a_flags & MV_WII_ONLY) && (b_flags & MV_WII_ONLY)) \
continue; \
if (!(a_flags & MV_FAKE_VMEM) && (b_flags & MV_FAKE_VMEM)) \
continue;
static bool Memory_TryBase(u8* base, MemoryView* views, int num_views, u32 flags, MemArena* arena)
{
// OK, we know where to find free space. Now grab it!
// We just mimic the popular BAT setup.
int i;
for (i = 0; i < num_views; i++)
{
MemoryView* view = &views[i];
void* view_base;
bool use_sw_mirror;
SKIP(flags, view->flags);
#if _ARCH_64
// On 64-bit, we map the same file position multiple times, so we
// don't need the software fallback for the mirrors.
view_base = base + view->virtual_address;
use_sw_mirror = false;
#else
// On 32-bit, we don't have the actual address space to store all
// the mirrors, so we just map the fallbacks somewhere in our address
// space and use the software fallbacks for mirroring.
view_base = base + (view->virtual_address & 0x3FFFFFFF);
use_sw_mirror = true;
#endif
if (use_sw_mirror && (view->flags & MV_MIRROR_PREVIOUS))
{
view->view_ptr = views[i - 1].view_ptr;
}
else
{
view->mapped_ptr = arena->CreateView(view->shm_position, view->size, view_base);
view->view_ptr = view->mapped_ptr;
}
if (!view->view_ptr)
{
// Argh! ERROR! Free what we grabbed so far so we can try again.
MemoryMap_Shutdown(views, i + 1, flags, arena);
return false;
}
if (view->out_ptr)
*(view->out_ptr) = (u8*)view->view_ptr;
}
return true;
}
static u32 MemoryMap_InitializeViews(MemoryView* views, int num_views, u32 flags)
{
u32 shm_position = 0;
u32 last_position = 0;
for (int i = 0; i < num_views; i++)
{
// Zero all the pointers to be sure.
views[i].mapped_ptr = nullptr;
SKIP(flags, views[i].flags);
if (views[i].flags & MV_MIRROR_PREVIOUS)
shm_position = last_position;
views[i].shm_position = shm_position;
last_position = shm_position;
shm_position += views[i].size;
}
return shm_position;
}
u8* MemoryMap_Setup(MemoryView* views, int num_views, u32 flags, MemArena* arena)
{
u32 total_mem = MemoryMap_InitializeViews(views, num_views, flags);
arena->GrabSHMSegment(total_mem);
// Now, create views in high memory where there's plenty of space.
u8* base = MemArena::FindMemoryBase();
// This really shouldn't fail - in 64-bit, there will always be enough
// address space.
if (!Memory_TryBase(base, views, num_views, flags, arena))
{
PanicAlert("MemoryMap_Setup: Failed finding a memory base.");
exit(0);
return nullptr;
}
return base;
}
void MemoryMap_Shutdown(MemoryView* views, int num_views, u32 flags, MemArena* arena)
{
std::set<void*> freeset;
for (int i = 0; i < num_views; i++)
{
MemoryView* view = &views[i];
if (view->mapped_ptr && !freeset.count(view->mapped_ptr))
{
arena->ReleaseView(view->mapped_ptr, view->size);
freeset.insert(view->mapped_ptr);
view->mapped_ptr = nullptr;
}
}
}

View File

@ -35,26 +35,3 @@ private:
int fd;
#endif
};
enum
{
MV_MIRROR_PREVIOUS = 1,
MV_FAKE_VMEM = 2,
MV_WII_ONLY = 4,
};
struct MemoryView
{
u8** out_ptr;
u64 virtual_address;
u32 size;
u32 flags;
void* mapped_ptr;
void* view_ptr;
u32 shm_position;
};
// Uses a memory arena to set up an emulator-friendly memory map according to
// a passed-in list of MemoryView structures.
u8* MemoryMap_Setup(MemoryView* views, int num_views, u32 flags, MemArena* arena);
void MemoryMap_Shutdown(MemoryView* views, int num_views, u32 flags, MemArena* arena);

View File

@ -237,6 +237,8 @@ bool CBoot::Load_BS2(const std::string& _rBootROMFilename)
PowerPC::ppcState.spr[SPR_DBAT1L] = 0x0000002a;
PowerPC::ppcState.spr[SPR_DBAT3U] = 0xfff0001f;
PowerPC::ppcState.spr[SPR_DBAT3L] = 0xfff00001;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
PC = 0x81200150;
return true;
}
@ -377,6 +379,8 @@ bool CBoot::BootUp()
PowerPC::ppcState.spr[SPR_DBAT4L] = 0x10000002;
PowerPC::ppcState.spr[SPR_DBAT5U] = 0xd0001fff;
PowerPC::ppcState.spr[SPR_DBAT5L] = 0x1000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
dolLoader.Load();
PC = dolLoader.GetEntryPoint();

View File

@ -52,6 +52,8 @@ bool CBoot::EmulatedBS2_GC(bool skipAppLoader)
PowerPC::ppcState.spr[SPR_DBAT0L] = 0x00000002;
PowerPC::ppcState.spr[SPR_DBAT1U] = 0xc0001fff;
PowerPC::ppcState.spr[SPR_DBAT1L] = 0x0000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
// Write necessary values
// Here we write values to memory that the apploader does not take care of. Game info goes
@ -298,11 +300,11 @@ bool CBoot::SetupWiiMemory(DiscIO::Country country)
Memory::Write_U32(0x8179d500, 0x00003110); // Init
Memory::Write_U32(0x04000000, 0x00003118); // Unknown
Memory::Write_U32(0x04000000, 0x0000311c); // BAT
Memory::Write_U32(0x93400000, 0x00003120); // BAT
Memory::Write_U32(0x93600000, 0x00003120); // BAT
Memory::Write_U32(0x90000800, 0x00003124); // Init - MEM2 low
Memory::Write_U32(0x93ae0000, 0x00003128); // Init - MEM2 high
Memory::Write_U32(0x93ae0000, 0x00003130); // IOS MEM2 low
Memory::Write_U32(0x93b00000, 0x00003134); // IOS MEM2 high
Memory::Write_U32(0x935e0000, 0x00003128); // Init - MEM2 high
Memory::Write_U32(0x935e0000, 0x00003130); // IOS MEM2 low
Memory::Write_U32(0x93600000, 0x00003134); // IOS MEM2 high
Memory::Write_U32(0x00000012, 0x00003138); // Console type
// 40 is copied from 88 after running apploader
Memory::Write_U32(0x00090204, 0x00003140); // IOS revision (IOS9, v2.4)
@ -371,6 +373,8 @@ bool CBoot::EmulatedBS2_Wii()
PowerPC::ppcState.spr[SPR_DBAT4L] = 0x10000002;
PowerPC::ppcState.spr[SPR_DBAT5U] = 0xd0001fff;
PowerPC::ppcState.spr[SPR_DBAT5L] = 0x1000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
Memory::Write_U32(0x4c000064, 0x00000300); // Write default DSI Handler: rfi
Memory::Write_U32(0x4c000064, 0x00000800); // Write default FPU Handler: rfi

View File

@ -410,6 +410,8 @@ void FifoPlayer::LoadMemory()
PowerPC::ppcState.spr[SPR_DBAT0L] = 0x00000002;
PowerPC::ppcState.spr[SPR_DBAT1U] = 0xc0001fff;
PowerPC::ppcState.spr[SPR_DBAT1L] = 0x0000002a;
PowerPC::DBATUpdated();
PowerPC::IBATUpdated();
SetupFifo();

View File

@ -96,6 +96,30 @@ bool IsInitialized()
return m_IsInitialized;
}
namespace
{
enum
{
MV_FAKE_VMEM = 1,
MV_WII_ONLY = 2,
};
struct PhysicalMemoryRegion
{
u8** out_pointer;
u32 physical_address;
u32 size;
u32 flags;
u32 shm_position;
};
struct LogicalMemoryView
{
void* mapped_pointer;
u32 mapped_size;
};
}
// Dolphin allocates memory to represent four regions:
// - 32MB RAM (actually 24MB on hardware), available on Gamecube and Wii
// - 64MB "EXRAM", RAM only available on Wii
@ -116,28 +140,12 @@ bool IsInitialized()
// [0x08000000, 0x0C000000) - EFB "mapping" (not handled here)
// [0x0C000000, 0x0E000000) - MMIO etc. (not handled here)
// [0x10000000, 0x14000000) - 64MB RAM (Wii-only; slightly slower)
//
// The 4GB starting at logical_base represents access from the CPU
// with address translation turned on. Instead of changing the mapping
// based on the BAT registers, we approximate the common BAT configuration
// used by games:
// [0x00000000, 0x02000000) - 32MB RAM, cached access, normally only mapped
// during startup by Wii WADs
// [0x02000000, 0x08000000) - Mirrors of 32MB RAM (not implemented here)
// [0x40000000, 0x50000000) - FakeVMEM
// [0x70000000, 0x80000000) - FakeVMEM
// [0x80000000, 0x82000000) - 32MB RAM, cached access
// [0x82000000, 0x88000000) - Mirrors of 32MB RAM (not implemented here)
// [0x90000000, 0x94000000) - 64MB RAM, Wii-only, cached access
// [0xC0000000, 0xC2000000) - 32MB RAM, uncached access
// [0xC2000000, 0xC8000000) - Mirrors of 32MB RAM (not implemented here)
// [0xC8000000, 0xCC000000) - EFB "mapping" (not handled here)
// [0xCC000000, 0xCE000000) - MMIO etc. (not handled here)
// [0xD0000000, 0xD4000000) - 64MB RAM, Wii-only, uncached access
// [0x7E000000, 0x80000000) - FakeVMEM
// [0xE0000000, 0xE0040000) - 256KB locked L1
//
// TODO: We shouldn't hardcode this mapping; we can generate it dynamically
// based on the BAT registers.
// The 4GB starting at logical_base represents access from the CPU
// with address translation turned on. This mapping is computed based
// on the BAT registers.
//
// Each of these 4GB regions is followed by 4GB of empty space so overflows
// in address computation in the JIT don't access the wrong memory.
@ -152,18 +160,14 @@ bool IsInitialized()
//
// TODO: The actual size of RAM is REALRAM_SIZE (24MB); the other 8MB shouldn't
// be backed by actual memory.
static MemoryView views[] = {
static PhysicalMemoryRegion physical_regions[] = {
{&m_pRAM, 0x00000000, RAM_SIZE, 0},
{nullptr, 0x200000000, RAM_SIZE, MV_MIRROR_PREVIOUS},
{nullptr, 0x280000000, RAM_SIZE, MV_MIRROR_PREVIOUS},
{nullptr, 0x2C0000000, RAM_SIZE, MV_MIRROR_PREVIOUS},
{&m_pL1Cache, 0x2E0000000, L1_CACHE_SIZE, 0},
{&m_pFakeVMEM, 0x27E000000, FAKEVMEM_SIZE, MV_FAKE_VMEM},
{&m_pL1Cache, 0xE0000000, L1_CACHE_SIZE, 0},
{&m_pFakeVMEM, 0x7E000000, FAKEVMEM_SIZE, MV_FAKE_VMEM},
{&m_pEXRAM, 0x10000000, EXRAM_SIZE, MV_WII_ONLY},
{nullptr, 0x290000000, EXRAM_SIZE, MV_WII_ONLY | MV_MIRROR_PREVIOUS},
{nullptr, 0x2D0000000, EXRAM_SIZE, MV_WII_ONLY | MV_MIRROR_PREVIOUS},
};
static const int num_views = sizeof(views) / sizeof(MemoryView);
static std::vector<LogicalMemoryView> logical_mapped_entries;
void Init()
{
@ -181,7 +185,34 @@ void Init()
flags |= MV_WII_ONLY;
if (bFakeVMEM)
flags |= MV_FAKE_VMEM;
physical_base = MemoryMap_Setup(views, num_views, flags, &g_arena);
u32 mem_size = 0;
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
region.shm_position = mem_size;
mem_size += region.size;
}
g_arena.GrabSHMSegment(mem_size);
physical_base = MemArena::FindMemoryBase();
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
u8* base = physical_base + region.physical_address;
*region.out_pointer = (u8*)g_arena.CreateView(region.shm_position, region.size, base);
if (!*region.out_pointer)
{
PanicAlert("MemoryMap_Setup: Failed finding a memory base.");
exit(0);
}
mem_size += region.size;
}
#ifndef _ARCH_32
logical_base = physical_base + 0x200000000;
#endif
@ -197,6 +228,55 @@ void Init()
m_IsInitialized = true;
}
void UpdateLogicalMemory(u32* dbat_table)
{
for (auto& entry : logical_mapped_entries)
{
g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size);
}
logical_mapped_entries.clear();
for (unsigned i = 0; i < (1 << (32 - PowerPC::BAT_INDEX_SHIFT)); ++i)
{
if (dbat_table[i] & 1)
{
unsigned logical_address = i << PowerPC::BAT_INDEX_SHIFT;
// TODO: Merge adjacent mappings to make this faster.
unsigned logical_size = 1 << PowerPC::BAT_INDEX_SHIFT;
unsigned translated_address = dbat_table[i] & ~3;
for (PhysicalMemoryRegion& physical_region : physical_regions)
{
u32 mapping_address = physical_region.physical_address;
u32 mapping_end = mapping_address + physical_region.size;
u32 intersection_start = std::max(mapping_address, translated_address);
u32 intersection_end = std::min(mapping_end, translated_address + logical_size);
if (intersection_start < intersection_end)
{
// Found an overlapping region; map it.
// We only worry about one overlapping region; in theory, a logical
// region could translate to more than one physical region, but in
// practice, that doesn't happen.
u32 position = physical_region.shm_position;
if (intersection_start > mapping_address)
position += intersection_start - mapping_address;
u8* base = logical_base + logical_address;
if (intersection_start > translated_address)
base += intersection_start - translated_address;
u32 mapped_size = intersection_end - intersection_start;
void* mapped_pointer = g_arena.CreateView(position, mapped_size, base);
if (!mapped_pointer)
{
PanicAlert("MemoryMap_Setup: Failed finding a memory base.");
exit(0);
}
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
break;
}
}
}
}
}
void DoState(PointerWrap& p)
{
bool wii = SConfig::GetInstance().bWii;
@ -219,7 +299,18 @@ void Shutdown()
flags |= MV_WII_ONLY;
if (bFakeVMEM)
flags |= MV_FAKE_VMEM;
MemoryMap_Shutdown(views, num_views, flags, &g_arena);
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
g_arena.ReleaseView(*region.out_pointer, region.size);
*region.out_pointer = 0;
}
for (auto& entry : logical_mapped_entries)
{
g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size);
}
logical_mapped_entries.clear();
g_arena.ReleaseSHMSegment();
physical_base = nullptr;
logical_base = nullptr;
@ -233,7 +324,9 @@ void Clear()
memset(m_pRAM, 0, RAM_SIZE);
if (m_pL1Cache)
memset(m_pL1Cache, 0, L1_CACHE_SIZE);
if (SConfig::GetInstance().bWii && m_pEXRAM)
if (m_pFakeVMEM)
memset(m_pFakeVMEM, 0, FAKEVMEM_SIZE);
if (m_pEXRAM)
memset(m_pEXRAM, 0, EXRAM_SIZE);
}

View File

@ -57,13 +57,6 @@ enum
IO_SIZE = 0x00010000,
EXRAM_SIZE = 0x04000000,
EXRAM_MASK = EXRAM_SIZE - 1,
ADDR_MASK_HW_ACCESS = 0x0c000000,
ADDR_MASK_MEM1 = 0x20000000,
#if _ARCH_32
MEMVIEW32_MASK = 0x3FFFFFFF,
#endif
};
// MMIO mapping object.
@ -75,6 +68,8 @@ void Init();
void Shutdown();
void DoState(PointerWrap& p);
void UpdateLogicalMemory(u32* dbat_table);
void Clear();
bool AreMemoryBreakpointsActivated();

View File

@ -962,6 +962,11 @@ IPCCommandResult CWII_IPC_HLE_Device_es::IOCtlV(u32 _CommandAddress)
if (pDolLoader->IsValid())
{
pDolLoader->Load(); // TODO: Check why sysmenu does not load the DOL correctly
// WADs start with address translation off at the given entry point.
//
// The state of other CPU registers (like the BAT registers) doesn't matter much
// because the WAD initializes everything itself anyway.
MSR = 0;
PC = pDolLoader->GetEntryPoint();
bSuccess = true;
}

View File

@ -297,6 +297,15 @@ void Interpreter::mtspr(UGeckoInstruction _inst)
// TODO: emulate locked cache and DMA bits.
break;
case SPR_HID4:
if (oldValue != rSPR(iIndex))
{
WARN_LOG(POWERPC, "HID4 updated %x %x", oldValue, rSPR(iIndex));
PowerPC::IBATUpdated();
PowerPC::DBATUpdated();
}
break;
case SPR_WPAR:
_assert_msg_(POWERPC, rGPR[_inst.RD] == 0x0C008000, "Gather pipe @ %08x", PC);
GPFifo::ResetGatherPipe();
@ -354,6 +363,52 @@ void Interpreter::mtspr(UGeckoInstruction _inst)
case SPR_XER:
SetXER(rSPR(iIndex));
break;
case SPR_DBAT0L:
case SPR_DBAT0U:
case SPR_DBAT1L:
case SPR_DBAT1U:
case SPR_DBAT2L:
case SPR_DBAT2U:
case SPR_DBAT3L:
case SPR_DBAT3U:
case SPR_DBAT4L:
case SPR_DBAT4U:
case SPR_DBAT5L:
case SPR_DBAT5U:
case SPR_DBAT6L:
case SPR_DBAT6U:
case SPR_DBAT7L:
case SPR_DBAT7U:
if (oldValue != rSPR(iIndex))
{
WARN_LOG(POWERPC, "DBAT updated %d %x %x", iIndex, oldValue, rSPR(iIndex));
PowerPC::DBATUpdated();
}
break;
case SPR_IBAT0L:
case SPR_IBAT0U:
case SPR_IBAT1L:
case SPR_IBAT1U:
case SPR_IBAT2L:
case SPR_IBAT2U:
case SPR_IBAT3L:
case SPR_IBAT3U:
case SPR_IBAT4L:
case SPR_IBAT4U:
case SPR_IBAT5L:
case SPR_IBAT5U:
case SPR_IBAT6L:
case SPR_IBAT6U:
case SPR_IBAT7L:
case SPR_IBAT7U:
if (oldValue != rSPR(iIndex))
{
WARN_LOG(POWERPC, "IBAT updated %d %x %x", iIndex, oldValue, rSPR(iIndex));
PowerPC::IBATUpdated();
}
break;
}
}

View File

@ -365,38 +365,46 @@ void Jit64::dcbz(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
MOV(32, R(RSCRATCH), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH), gpr.R(a));
AND(32, R(RSCRATCH), Imm32(~31));
TEST(32, R(RSCRATCH), Imm32(mem_mask));
FixupBranch slow = J_CC(CC_NZ, true);
// Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but
// supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure.
if (!UReg_MSR(MSR).DR)
{
// If address translation is turned off, just call the general-case code.
AND(32, R(RSCRATCH), Imm32(~31));
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionR((void*)&PowerPC::ClearCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
return;
}
// Perform lookup to see if we can use fast path.
MOV(32, R(RSCRATCH2), R(RSCRATCH));
SHR(32, R(RSCRATCH), Imm8(PowerPC::BAT_INDEX_SHIFT));
TEST(32, MScaled(RSCRATCH, SCALE_4, (u32)(u64)PowerPC::dbat_table), Imm32(2));
FixupBranch slow = J_CC(CC_Z, true);
// Fast path: compute full address, then zero out 32 bytes of memory.
AND(32, R(RSCRATCH2), Imm8(~31));
PXOR(XMM0, R(XMM0));
MOVAPS(MComplex(RMEM, RSCRATCH2, SCALE_1, 0), XMM0);
MOVAPS(MComplex(RMEM, RSCRATCH2, SCALE_1, 16), XMM0);
// Slow path: mask the address, then call the general-case code.
SwitchToFarCode();
SetJumpTarget(slow);
AND(32, R(RSCRATCH2), Imm32(~31));
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH);
ABI_CallFunctionR(&PowerPC::ClearCacheLine, RSCRATCH2);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
FixupBranch exit = J(true);
FixupBranch end = J(true);
SwitchToNearCode();
// Mask out the address so we don't write to MEM1 out of bounds
// FIXME: Work out why the AGP disc writes out of bounds
if (!SConfig::GetInstance().bWii)
AND(32, R(RSCRATCH), Imm32(Memory::RAM_MASK));
PXOR(XMM0, R(XMM0));
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
SetJumpTarget(exit);
SetJumpTarget(end);
}
void Jit64::stX(UGeckoInstruction inst)

View File

@ -25,6 +25,9 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
s32 offset = inst.SIMM_12;
bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 61 && offset) || (inst.OPCD == 4 && !!(inst.SUBOP6 & 32));
@ -112,6 +115,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
s32 offset = inst.SIMM_12;
bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 57 && offset) || (inst.OPCD == 4 && !!(inst.SUBOP6 & 32));

View File

@ -401,7 +401,8 @@ void QuantizedMemoryRoutines::GenQuantizedStore(bool single, EQuantizeType type,
}
}
int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG;
int flags =
isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON;
if (!single)
flags |= SAFE_LOADSTORE_NO_SWAP;
@ -459,7 +460,8 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
if (jit->jo.memcheck)
{
BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE_LOAD;
int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG;
int flags =
isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON;
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, regsToSave, extend, flags);
if (!single && (type == QUANTIZE_U8 || type == QUANTIZE_S8))
{
@ -582,7 +584,8 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
if (jit->jo.memcheck)
{
BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE;
int flags = isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG;
int flags =
isInline ? 0 : SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_DR_ON;
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), size, 0, regsToSave, extend, flags);
}

View File

@ -23,6 +23,9 @@ void JitArm64::psq_l(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck || !jo.fastmem);
// The asm routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
// X30 is LR
// X0 contains the scale
// X1 is the address
@ -103,6 +106,9 @@ void JitArm64::psq_st(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck || !jo.fastmem);
// The asm routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
// X30 is LR
// X0 contains the scale
// X1 is the address
@ -179,6 +185,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
UBFM(scale_reg, scale_reg, 8, 13); // Scale
// Inline address check
// FIXME: This doesn't correctly account for the BAT configuration.
TST(addr_reg, 6, 1);
FixupBranch pass = B(CC_EQ);
FixupBranch fail = B();

View File

@ -47,6 +47,16 @@ void JitArm64::GenerateAsm()
dispatcherNoCheck = GetCodePtr();
// Make sure MEM_REG is pointing at an appropriate register, based on MSR.DR.
LDR(INDEX_UNSIGNED, W0, PPC_REG, PPCSTATE_OFF(msr));
TST(W0, 28, 1);
FixupBranch physmem = B(CC_NEQ);
MOVI2R(MEM_REG, (u64)Memory::physical_base);
FixupBranch membaseend = B();
SetJumpTarget(physmem);
MOVI2R(MEM_REG, (u64)Memory::logical_base);
SetJumpTarget(membaseend);
bool assembly_dispatcher = true;
if (assembly_dispatcher)

View File

@ -212,7 +212,7 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
}
FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr,
BitSet32 registers_in_use, u32 mem_mask)
BitSet32 registers_in_use)
{
registers_in_use[reg_addr] = true;
if (reg_value.IsSimpleReg())
@ -227,29 +227,19 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
else
scratch = reg_addr;
// On Gamecube games with MMU, do a little bit of extra work to make sure we're not accessing the
// 0x81800000 to 0x83FFFFFF range.
// It's okay to take a shortcut and not check this range on non-MMU games, since we're already
// assuming they'll never do an invalid memory access.
// The slightly more complex check needed for Wii games using the space just above MEM1 isn't
// implemented here yet, since there are no known working Wii MMU games to test it with.
if (jit->jo.memcheck && !SConfig::GetInstance().bWii)
{
if (scratch == reg_addr)
PUSH(scratch);
else
MOV(32, R(scratch), R(reg_addr));
AND(32, R(scratch), Imm32(0x3FFFFFFF));
CMP(32, R(scratch), Imm32(0x01800000));
if (scratch == reg_addr)
POP(scratch);
return J_CC(CC_AE, farcode.Enabled());
}
if (scratch == reg_addr)
PUSH(scratch);
else
{
TEST(32, R(reg_addr), Imm32(mem_mask));
return J_CC(CC_NZ, farcode.Enabled());
}
MOV(32, R(scratch), R(reg_addr));
// Perform lookup to see if we can use fast path.
SHR(32, R(scratch), Imm8(PowerPC::BAT_INDEX_SHIFT));
TEST(32, MScaled(scratch, SCALE_4, (u32)(u64)PowerPC::dbat_table), Imm32(2));
if (scratch == reg_addr)
POP(scratch);
return J_CC(CC_Z, farcode.Enabled());
}
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
@ -305,14 +295,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
}
FixupBranch exit;
if (!slowmem)
bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || UReg_MSR(MSR).DR;
bool fast_check_address = !slowmem && dr_set;
if (fast_check_address)
{
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask);
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse);
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
if (farcode.Enabled())
SwitchToFarCode();
@ -350,7 +337,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
}
if (!slowmem)
if (fast_check_address)
{
if (farcode.Enabled())
{
@ -575,15 +562,12 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
}
}
FixupBranch slow, exit;
if (!slowmem)
FixupBranch exit;
bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || UReg_MSR(MSR).DR;
bool fast_check_address = !slowmem && dr_set;
if (fast_check_address)
{
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
if (farcode.Enabled())
SwitchToFarCode();
@ -629,7 +613,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
MemoryExceptionCheck();
if (!slowmem)
if (fast_check_address)
{
if (farcode.Enabled())
{

View File

@ -123,7 +123,7 @@ public:
}
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
BitSet32 registers_in_use, u32 mem_mask);
BitSet32 registers_in_use);
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
s32 offset = 0, bool signExtend = false);
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
@ -154,6 +154,7 @@ public:
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
// Force slowmem (used when generating fallbacks in trampolines)
SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
SAFE_LOADSTORE_DR_ON = 32,
};
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,

View File

@ -12,6 +12,9 @@ void JitILBase::psq_st(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck || inst.W);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12);
IREmitter::InstLoc val;
@ -32,6 +35,9 @@ void JitILBase::psq_l(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck || inst.W);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!UReg_MSR(MSR).DR);
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12);
IREmitter::InstLoc val;

View File

@ -12,6 +12,7 @@
#include "Core/HW/GPFifo.h"
#include "Core/HW/MMIO.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "VideoCommon/VideoBackendBase.h"
@ -89,8 +90,20 @@ static bool IsNoExceptionFlag(XCheckTLBFlag flag)
return flag == FLAG_NO_EXCEPTION || flag == FLAG_OPCODE_NO_EXCEPTION;
}
struct TranslateAddressResult
{
enum
{
BAT_TRANSLATED,
PAGE_TABLE_TRANSLATED,
DIRECT_STORE_SEGMENT,
PAGE_FAULT
} result;
u32 address;
bool Success() { return result <= PAGE_TABLE_TRANSLATED; }
};
template <const XCheckTLBFlag flag>
static u32 TranslateAddress(const u32 address);
static TranslateAddressResult TranslateAddress(const u32 address);
// Nasty but necessary. Super Mario Galaxy pointer relies on this stuff.
static u32 EFB_Read(const u32 addr)
@ -142,282 +155,203 @@ static void EFB_Write(u32 data, u32 addr)
}
}
u32 dbat_table[1 << (32 - BAT_INDEX_SHIFT)];
u32 ibat_table[1 << (32 - BAT_INDEX_SHIFT)];
static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite);
template <XCheckTLBFlag flag, typename T>
__forceinline static T ReadFromHardware(const u32 em_address)
template <XCheckTLBFlag flag, typename T, bool never_translate = false>
__forceinline static T ReadFromHardware(u32 em_address)
{
int segment = em_address >> 28;
bool performTranslation = UReg_MSR(MSR).DR;
// Quick check for an address that can't meet any of the following conditions,
// to speed up the MMU path.
if (!BitSet32(0xCFC)[segment] && performTranslation)
if (!never_translate && UReg_MSR(MSR).DR)
{
// TODO: Figure out the fastest order of tests for both read and write (they are probably
// different).
if (flag == FLAG_READ && (em_address & 0xF8000000) == 0xC8000000)
{
if (em_address < 0xcc000000)
return EFB_Read(em_address);
else
return (T)Memory::mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address &
0x0FFFFFFF);
}
if (segment == 0x0 || segment == 0x8 || segment == 0xC)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
return bswap((*(const T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK]));
}
if (Memory::m_pEXRAM && (segment == 0x9 || segment == 0xD) &&
(em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
// Handle EXRAM.
// TODO: Is this supposed to be mirrored like main RAM?
return bswap((*(const T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF]));
}
if (segment == 0xE && (em_address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
{
return bswap((*(const T*)&Memory::m_pL1Cache[em_address & 0x0FFFFFFF]));
}
}
if (Memory::bFakeVMEM && performTranslation && (segment == 0x7 || segment == 0x4))
{
// fake VMEM
return bswap((*(const T*)&Memory::m_pFakeVMEM[em_address & Memory::FAKEVMEM_MASK]));
}
if (!performTranslation)
{
if (flag == FLAG_READ && (em_address & 0xF8000000) == 0x08000000)
{
if (em_address < 0x0c000000)
return EFB_Read(em_address);
else
return (T)Memory::mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address);
}
if (segment == 0x0)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
return bswap((*(const T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK]));
}
if (Memory::m_pEXRAM && segment == 0x1 && (em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
return bswap((*(const T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF]));
}
PanicAlert("Unable to resolve read address %x PC %x", em_address, PC);
return 0;
}
// MMU: Do page table translation
u32 tlb_addr = TranslateAddress<flag>(em_address);
if (tlb_addr == 0)
{
if (flag == FLAG_READ)
GenerateDSIException(em_address, false);
return 0;
}
// Handle loads that cross page boundaries (ewwww)
// The alignment check isn't strictly necessary, but since this is a rare slow path, it provides a
// faster
// (1 instruction on x86) bailout.
if (sizeof(T) > 1 && (em_address & (sizeof(T) - 1)) &&
(em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
u32 tlb_addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (tlb_addr == 0 || tlb_addr_next_page == 0)
auto translated_addr = TranslateAddress<flag>(em_address);
if (!translated_addr.Success())
{
if (flag == FLAG_READ)
GenerateDSIException(em_address_next_page, false);
GenerateDSIException(em_address, false);
return 0;
}
T var = 0;
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++)
if ((em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{
if (addr == em_address_next_page)
tlb_addr = tlb_addr_next_page;
var = (var << 8) | Memory::physical_base[tlb_addr];
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
auto tlb_addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (!tlb_addr_next_page.Success())
{
if (flag == FLAG_READ)
GenerateDSIException(em_address_next_page, false);
return 0;
}
T var = 0;
u32 tlb_addr = translated_addr.address;
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++)
{
if (addr == em_address_next_page)
tlb_addr = tlb_addr_next_page.address;
var = (var << 8) | ReadFromHardware<flag, u8, true>(tlb_addr);
}
return var;
}
return var;
em_address = translated_addr.address;
}
// The easy case!
return bswap(*(const T*)&Memory::physical_base[tlb_addr]);
// TODO: Make sure these are safe for unaligned addresses.
// Locked L1 technically doesn't have a fixed address, but games all use 0xE0000000.
if ((em_address >> 28) == 0xE && (em_address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
{
return bswap((*(const T*)&Memory::m_pL1Cache[em_address & 0x0FFFFFFF]));
}
// In Fake-VMEM mode, we need to map the memory somewhere into
// physical memory for BAT translation to work; we currently use
// [0x7E000000, 0x80000000).
if (Memory::bFakeVMEM && ((em_address & 0xFE000000) == 0x7E000000))
{
return bswap(*(T*)&Memory::m_pFakeVMEM[em_address & Memory::RAM_MASK]);
}
if (flag == FLAG_READ && (em_address & 0xF8000000) == 0x08000000)
{
if (em_address < 0x0c000000)
return EFB_Read(em_address);
else
return (T)Memory::mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address);
}
if ((em_address & 0xF8000000) == 0x00000000)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
return bswap((*(const T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK]));
}
if (Memory::m_pEXRAM && (em_address >> 28) == 0x1 &&
(em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
return bswap((*(const T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF]));
}
PanicAlert("Unable to resolve read address %x PC %x", em_address, PC);
return 0;
}
template <XCheckTLBFlag flag, typename T>
template <XCheckTLBFlag flag, typename T, bool never_translate = false>
__forceinline static void WriteToHardware(u32 em_address, const T data)
{
int segment = em_address >> 28;
// Quick check for an address that can't meet any of the following conditions,
// to speed up the MMU path.
bool performTranslation = UReg_MSR(MSR).DR;
if (!BitSet32(0xCFC)[segment] && performTranslation)
if (!never_translate && UReg_MSR(MSR).DR)
{
// First, let's check for FIFO writes, since they are probably the most common
// reason we end up in this function.
// Note that we must mask the address to correctly emulate certain games;
// Pac-Man World 3 in particular is affected by this.
if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0xCC008000)
{
switch (sizeof(T))
{
case 1:
GPFifo::Write8((u8)data);
return;
case 2:
GPFifo::Write16((u16)data);
return;
case 4:
GPFifo::Write32((u32)data);
return;
case 8:
GPFifo::Write64((u64)data);
return;
}
}
if (flag == FLAG_WRITE && (em_address & 0xF8000000) == 0xC8000000)
{
if (em_address < 0xcc000000)
{
// TODO: This only works correctly for 32-bit writes.
EFB_Write((u32)data, em_address);
return;
}
else
{
Memory::mmio_mapping->Write(em_address & 0x0FFFFFFF, data);
return;
}
}
if (segment == 0x0 || segment == 0x8 || segment == 0xC)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
*(T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK] = bswap(data);
return;
}
if (Memory::m_pEXRAM && (segment == 0x9 || segment == 0xD) &&
(em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
// Handle EXRAM.
// TODO: Is this supposed to be mirrored like main RAM?
*(T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
if (segment == 0xE && (em_address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
{
*(T*)&Memory::m_pL1Cache[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
}
if (Memory::bFakeVMEM && performTranslation && (segment == 0x7 || segment == 0x4))
{
// fake VMEM
*(T*)&Memory::m_pFakeVMEM[em_address & Memory::FAKEVMEM_MASK] = bswap(data);
return;
}
if (!performTranslation)
{
if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0x0C008000)
{
switch (sizeof(T))
{
case 1:
GPFifo::Write8((u8)data);
return;
case 2:
GPFifo::Write16((u16)data);
return;
case 4:
GPFifo::Write32((u32)data);
return;
case 8:
GPFifo::Write64((u64)data);
return;
}
}
if (flag == FLAG_WRITE && (em_address & 0xF8000000) == 0x08000000)
{
if (em_address < 0x0c000000)
{
// TODO: This only works correctly for 32-bit writes.
EFB_Write((u32)data, em_address);
return;
}
else
{
Memory::mmio_mapping->Write(em_address, data);
return;
}
}
if (segment == 0x0)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
*(T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK] = bswap(data);
return;
}
if (Memory::m_pEXRAM && segment == 0x1 && (em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
*(T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
PanicAlert("Unable to resolve write address %x PC %x", em_address, PC);
return;
}
// MMU: Do page table translation
u32 tlb_addr = TranslateAddress<flag>(em_address);
if (tlb_addr == 0)
{
if (flag == FLAG_WRITE)
GenerateDSIException(em_address, true);
return;
}
// Handle stores that cross page boundaries (ewwww)
if (sizeof(T) > 1 && (em_address & (sizeof(T) - 1)) &&
(em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{
T val = bswap(data);
// We need to check both addresses before writing in case there's a DSI.
u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
u32 tlb_addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (tlb_addr_next_page == 0)
auto translated_addr = TranslateAddress<flag>(em_address);
if (!translated_addr.Success())
{
if (flag == FLAG_WRITE)
GenerateDSIException(em_address_next_page, true);
GenerateDSIException(em_address, true);
return;
}
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++, val >>= 8)
if ((em_address & (sizeof(T) - 1)) &&
(em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T))
{
if (addr == em_address_next_page)
tlb_addr = tlb_addr_next_page;
Memory::physical_base[tlb_addr] = (u8)val;
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1);
auto tlb_addr_next_page = TranslateAddress<flag>(em_address_next_page);
if (!tlb_addr_next_page.Success())
{
if (flag == FLAG_WRITE)
GenerateDSIException(em_address_next_page, true);
return;
}
T val = bswap(data);
u32 tlb_addr = translated_addr.address;
for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++, val >>= 8)
{
if (addr == em_address_next_page)
tlb_addr = tlb_addr_next_page.address;
WriteToHardware<flag, u8, true>(tlb_addr, (u8)val);
}
return;
}
em_address = translated_addr.address;
}
// TODO: Make sure these are safe for unaligned addresses.
// Locked L1 technically doesn't have a fixed address, but games all use 0xE0000000.
if ((em_address >> 28 == 0xE) && (em_address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
{
*(T*)&Memory::m_pL1Cache[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
// The easy case!
*(T*)&Memory::physical_base[tlb_addr] = bswap(data);
// In Fake-VMEM mode, we need to map the memory somewhere into
// physical memory for BAT translation to work; we currently use
// [0x7E000000, 0x80000000).
if (Memory::bFakeVMEM && ((em_address & 0xFE000000) == 0x7E000000))
{
*(T*)&Memory::m_pFakeVMEM[em_address & Memory::RAM_MASK] = bswap(data);
return;
}
// Check for a gather pipe write.
// Note that we must mask the address to correctly emulate certain games;
// Pac-Man World 3 in particular is affected by this.
if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0x0C008000)
{
switch (sizeof(T))
{
case 1:
GPFifo::Write8((u8)data);
return;
case 2:
GPFifo::Write16((u16)data);
return;
case 4:
GPFifo::Write32((u32)data);
return;
case 8:
GPFifo::Write64((u64)data);
return;
}
}
if (flag == FLAG_WRITE && (em_address & 0xF8000000) == 0x08000000)
{
if (em_address < 0x0c000000)
{
EFB_Write((u32)data, em_address);
return;
}
else
{
Memory::mmio_mapping->Write(em_address, data);
return;
}
}
if ((em_address & 0xF8000000) == 0x00000000)
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
// TODO: Only the first REALRAM_SIZE is supposed to be backed by actual memory.
*(T*)&Memory::m_pRAM[em_address & Memory::RAM_MASK] = bswap(data);
return;
}
if (Memory::m_pEXRAM && (em_address >> 28) == 0x1 &&
(em_address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
*(T*)&Memory::m_pEXRAM[em_address & 0x0FFFFFFF] = bswap(data);
return;
}
PanicAlert("Unable to resolve write address %x PC %x", em_address, PC);
return;
}
// =====================
@ -444,49 +378,28 @@ TryReadInstResult TryReadInstruction(u32 address)
bool from_bat = true;
if (UReg_MSR(MSR).IR)
{
// TODO: Use real translation.
if (SConfig::GetInstance().bMMU && (address & Memory::ADDR_MASK_MEM1))
auto tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (!tlb_addr.Success())
{
u32 tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (tlb_addr == 0)
{
return TryReadInstResult{false, false, 0};
}
else
{
address = tlb_addr;
from_bat = false;
}
return TryReadInstResult{false, false, 0};
}
else
{
int segment = address >> 28;
if ((segment == 0x8 || segment == 0x0) && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
{
address = address & 0x3FFFFFFF;
}
else if (segment == 0x9 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
{
address = address & 0x3FFFFFFF;
}
else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4))
{
u32 hex = bswap((*(const u32*)&Memory::m_pFakeVMEM[address & Memory::FAKEVMEM_MASK]));
return TryReadInstResult{true, true, hex};
}
else
{
return TryReadInstResult{false, false, 0};
}
address = tlb_addr.address;
from_bat = tlb_addr.result == TranslateAddressResult::BAT_TRANSLATED;
}
}
u32 hex;
// TODO: Refactor this.
if (Memory::bFakeVMEM && ((address & 0xFE000000) == 0x7E000000))
{
hex = bswap(*(const u32*)&Memory::m_pFakeVMEM[address & Memory::FAKEVMEM_MASK]);
}
else
{
if (address & 0xC0000000)
ERROR_LOG(MEMMAP, "Strange program counter with address translation off: 0x%08x", address);
hex = PowerPC::ppcState.iCache.ReadInstruction(address);
}
u32 hex = PowerPC::ppcState.iCache.ReadInstruction(address);
return TryReadInstResult{true, from_bat, hex};
}
@ -697,43 +610,35 @@ bool IsOptimizableRAMAddress(const u32 address)
if (!UReg_MSR(MSR).DR)
return false;
int segment = address >> 28;
return (((segment == 0x8 || segment == 0xC || segment == 0x0) &&
(address & 0x0FFFFFFF) < Memory::REALRAM_SIZE) ||
(Memory::m_pEXRAM && (segment == 0x9 || segment == 0xD) &&
(address & 0x0FFFFFFF) < Memory::EXRAM_SIZE) ||
(segment == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE))));
// TODO: This API needs to take an access size
//
// We store whether an access can be optimized to an unchecked access
// in dbat_table.
u32 bat_result = dbat_table[address >> BAT_INDEX_SHIFT];
return (bat_result & 2) != 0;
}
bool HostIsRAMAddress(u32 address)
{
// TODO: This needs to be rewritten; it makes incorrect assumptions
// about BATs and page tables.
bool performTranslation = UReg_MSR(MSR).DR;
int segment = address >> 28;
if (performTranslation)
{
if ((segment == 0x8 || segment == 0xC || segment == 0x0) &&
(address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
return true;
else if (Memory::m_pEXRAM && (segment == 0x9 || segment == 0xD) &&
(address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
return true;
else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4))
return true;
else if (segment == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
return true;
address = TranslateAddress<FLAG_NO_EXCEPTION>(address);
if (!address)
auto translate_address = TranslateAddress<FLAG_NO_EXCEPTION>(address);
if (!translate_address.Success())
return false;
address = translate_address.address;
segment = address >> 28;
}
if (segment == 0x0 && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
return true;
else if (Memory::m_pEXRAM && segment == 0x1 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
return true;
else if (Memory::bFakeVMEM && ((address & 0xFE000000) == 0x7E000000))
return true;
else if (segment == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
return true;
return false;
}
@ -809,12 +714,32 @@ void DMA_MemoryToLC(const u32 cacheAddr, const u32 memAddr, const u32 numBlocks)
memcpy(dst, src, 32 * numBlocks);
}
void ClearCacheLine(const u32 address)
void ClearCacheLine(u32 address)
{
// FIXME: does this do the right thing if dcbz is run on hardware memory, e.g.
// the FIFO? Do games even do that? Probably not, but we should try to be correct...
_dbg_assert_(POWERPC, (address & 0x1F) == 0);
if (UReg_MSR(MSR).DR)
{
auto translated_address = TranslateAddress<FLAG_WRITE>(address);
if (translated_address.result == TranslateAddressResult::DIRECT_STORE_SEGMENT)
{
// dcbz to direct store segments is ignored. This is a little
// unintuitive, but this is consistent with both console and the PEM.
// Advance Game Port crashes if we don't emulate this correctly.
return;
}
if (translated_address.result == TranslateAddressResult::PAGE_FAULT)
{
// If translation fails, generate a DSI.
GenerateDSIException(address, true);
return;
}
address = translated_address.address;
}
// TODO: This isn't precisely correct for non-RAM regions, but the difference
// is unlikely to matter.
for (u32 i = 0; i < 32; i += 8)
Write_U64(0, address + i);
WriteToHardware<FLAG_WRITE, u64, true>(address + i, 0);
}
u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize)
@ -826,10 +751,13 @@ u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize)
if (!UReg_MSR(MSR).DR)
return 0;
if ((address & 0xF0000000) != 0xC0000000)
return 0;
// Translate address
u32 bat_result = dbat_table[address >> BAT_INDEX_SHIFT];
if ((bat_result & 1) == 0)
return false;
u32 translated = (bat_result & ~3) | (address & 0x0001FFFF);
unsigned translated = address & 0x0FFFFFFF;
// Check whether the address is an aligned address of an MMIO register.
bool aligned = (translated & ((accessSize >> 3) - 1)) == 0;
if (!aligned || !MMIO::IsMMIOAddress(translated))
return 0;
@ -845,7 +773,14 @@ bool IsOptimizableGatherPipeWrite(u32 address)
if (!UReg_MSR(MSR).DR)
return false;
return address == 0xCC008000;
// Translate address
u32 bat_result = dbat_table[address >> BAT_INDEX_SHIFT];
if ((bat_result & 1) == 0)
return false;
u32 translated = (bat_result & ~3) | (address & 0x0001FFFF);
// Check whether the translated address equals the address in WPAR.
return translated == 0x0C008000;
}
TranslateResult JitCache_TranslateAddress(u32 address)
@ -853,36 +788,15 @@ TranslateResult JitCache_TranslateAddress(u32 address)
if (!UReg_MSR(MSR).IR)
return TranslateResult{true, true, address};
bool from_bat = true;
int segment = address >> 28;
if (SConfig::GetInstance().bMMU && (address & Memory::ADDR_MASK_MEM1))
// TODO: We shouldn't use FLAG_OPCODE if the caller is the debugger.
auto tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (!tlb_addr.Success())
{
u32 tlb_addr = TranslateAddress<FLAG_OPCODE>(address);
if (tlb_addr == 0)
{
return TranslateResult{false, false, 0};
}
else
{
address = tlb_addr;
from_bat = false;
}
}
else
{
if ((segment == 0x8 || segment == 0x0) && (address & 0x0FFFFFFF) < Memory::REALRAM_SIZE)
address = address & 0x3FFFFFFF;
else if (Memory::m_pEXRAM && segment == 0x9 && (address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
address = address & 0x3FFFFFFF;
else if (Memory::bFakeVMEM && (segment == 0x7 || segment == 0x4))
address = 0x7E000000 | (address & Memory::FAKEVMEM_MASK);
else
return TranslateResult{false, false, 0};
return TranslateResult{false, false, 0};
}
return TranslateResult{true, from_bat, address};
bool from_bat = tlb_addr.result == TranslateAddressResult::BAT_TRANSLATED;
return TranslateResult{true, from_bat, tlb_addr.address};
}
// *********************************************************************************
@ -1115,7 +1029,8 @@ void InvalidateTLBEntry(u32 address)
}
// Page Address Translation
static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLBFlag flag)
static __forceinline TranslateAddressResult TranslatePageAddress(const u32 address,
const XCheckTLBFlag flag)
{
// TLB cache
// This catches 99%+ of lookups in practice, so the actual page table entry code below doesn't
@ -1124,10 +1039,19 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB
u32 translatedAddress = 0;
TLBLookupResult res = LookupTLBPageAddress(flag, address, &translatedAddress);
if (res == TLB_FOUND)
return translatedAddress;
return TranslateAddressResult{TranslateAddressResult::PAGE_TABLE_TRANSLATED, translatedAddress};
u32 sr = PowerPC::ppcState.sr[EA_SR(address)];
if (sr & 0x80000000)
return TranslateAddressResult{TranslateAddressResult::DIRECT_STORE_SEGMENT, 0};
// TODO: Handle KS/KP segment register flags.
// No-execute segment register flag.
if ((flag == FLAG_OPCODE || flag == FLAG_OPCODE_NO_EXCEPTION) && (sr & 0x10000000))
return TranslateAddressResult{TranslateAddressResult::PAGE_FAULT, 0};
u32 offset = EA_Offset(address); // 12 bit
u32 page_index = EA_PageIndex(address); // 16 bit
u32 VSID = SR_VSID(sr); // 24 bit
@ -1181,19 +1105,136 @@ static __forceinline u32 TranslatePageAddress(const u32 address, const XCheckTLB
if (res != TLB_UPDATE_C)
UpdateTLBEntry(flag, PTE2, address);
return (PTE2.RPN << 12) | offset;
return TranslateAddressResult{TranslateAddressResult::PAGE_TABLE_TRANSLATED,
(PTE2.RPN << 12) | offset};
}
}
}
return 0;
return TranslateAddressResult{TranslateAddressResult::PAGE_FAULT, 0};
}
static void UpdateBATs(u32* bat_table, u32 base_spr)
{
// TODO: Separate BATs for MSR.PR==0 and MSR.PR==1
// TODO: Handle PP/WIMG settings.
// TODO: Check how hardware reacts to overlapping BATs (including
// BATs which should cause a DSI).
// TODO: Check how hardware reacts to invalid BATs (bad mask etc).
for (int i = 0; i < 4; ++i)
{
u32 spr = base_spr + i * 2;
UReg_BAT_Up batu = PowerPC::ppcState.spr[spr];
UReg_BAT_Lo batl = PowerPC::ppcState.spr[spr + 1];
if (batu.VS == 0 && batu.VP == 0)
continue;
if ((batu.BEPI & batu.BL) != 0)
{
// With a valid BAT, the simplest way to match is
// (input & ~BL_mask) == BEPI. For now, assume it's
// implemented this way for invalid BATs as well.
WARN_LOG(POWERPC, "Bad BAT setup: BEPI overlaps BL");
continue;
}
if ((batl.BRPN & batu.BL) != 0)
{
// With a valid BAT, the simplest way to translate is
// (input & BL_mask) | BRPN_address. For now, assume it's
// implemented this way for invalid BATs as well.
WARN_LOG(POWERPC, "Bad BAT setup: BPRN overlaps BL");
}
if (CountSetBits((u32)(batu.BL + 1)) != 1)
{
// With a valid BAT, the simplest way of masking is
// (input & ~BL_mask) for matching and (input & BL_mask) for
// translation. For now, assume it's implemented this way for
// invalid BATs as well.
WARN_LOG(POWERPC, "Bad BAT setup: invalid mask in BL");
}
for (u32 j = 0; j < (1 << 11); ++j)
{
// Enumerate all bit-patterns which fit within the given mask.
if ((j & batu.BL) == j)
{
// This bit is a little weird: if BRPN & j != 0, we end up with
// a strange mapping. Need to check on hardware.
u32 address = (batl.BRPN | j) << BAT_INDEX_SHIFT;
// The bottom bit is whether the translation is valid; the second
// bit from the bottom is whether we can use the fastmem arena.
u32 valid_bit = 0x1;
if (Memory::bFakeVMEM && ((address & 0xFE000000) == 0x7E000000))
valid_bit = 0x3;
else if (address < Memory::REALRAM_SIZE)
valid_bit = 0x3;
else if (Memory::m_pEXRAM && (address >> 28) == 0x1 &&
(address & 0x0FFFFFFF) < Memory::EXRAM_SIZE)
valid_bit = 0x3;
else if ((address >> 28) == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE)))
valid_bit = 0x3;
// (BEPI | j) == (BEPI & ~BL) | (j & BL).
bat_table[batu.BEPI | j] = address | valid_bit;
}
}
}
}
static void UpdateFakeMMUBat(u32* bat_table, u32 start_addr)
{
for (unsigned i = 0; i < (0x10000000 >> BAT_INDEX_SHIFT); ++i)
{
// Map from 0x4XXXXXXX or 0x7XXXXXXX to the range
// [0x7E000000,0x80000000).
u32 e_address = i + (start_addr >> BAT_INDEX_SHIFT);
u32 p_address = 0x7E000003 | ((i << BAT_INDEX_SHIFT) & Memory::FAKEVMEM_MASK);
bat_table[e_address] = p_address;
}
}
void DBATUpdated()
{
memset(dbat_table, 0, sizeof(dbat_table));
UpdateBATs(dbat_table, SPR_DBAT0U);
bool extended_bats = SConfig::GetInstance().bWii && HID4.SBE;
if (extended_bats)
UpdateBATs(dbat_table, SPR_DBAT4U);
if (Memory::bFakeVMEM)
{
// In Fake-MMU mode, insert some extra entries into the BAT tables.
UpdateFakeMMUBat(dbat_table, 0x40000000);
UpdateFakeMMUBat(dbat_table, 0x70000000);
}
Memory::UpdateLogicalMemory(dbat_table);
JitInterface::ClearSafe();
}
void IBATUpdated()
{
memset(ibat_table, 0, sizeof(ibat_table));
UpdateBATs(ibat_table, SPR_IBAT0U);
bool extended_bats = SConfig::GetInstance().bWii && HID4.SBE;
if (extended_bats)
UpdateBATs(ibat_table, SPR_IBAT4U);
if (Memory::bFakeVMEM)
{
// In Fake-MMU mode, insert some extra entries into the BAT tables.
UpdateFakeMMUBat(ibat_table, 0x40000000);
UpdateFakeMMUBat(ibat_table, 0x70000000);
}
JitInterface::ClearSafe();
}
// Translate effective address using BAT or PAT. Returns 0 if the address cannot be translated.
template <const XCheckTLBFlag flag>
__forceinline u32 TranslateAddress(const u32 address)
TranslateAddressResult TranslateAddress(const u32 address)
{
// TODO: Perform BAT translation. (At the moment, we hardcode an assumed BAT
// configuration, so there's no reason to actually check the registers.)
u32 bat_result = (flag == FLAG_OPCODE ? ibat_table : dbat_table)[address >> BAT_INDEX_SHIFT];
if (bat_result & 1)
{
u32 result_addr = (bat_result & ~3) | (address & 0x0001FFFF);
return TranslateAddressResult{TranslateAddressResult::BAT_TRANSLATED, result_addr};
}
return TranslatePageAddress(address, flag);
}

View File

@ -84,9 +84,6 @@ void PPCSymbolDB::AddKnownSymbol(u32 startAddr, u32 size, const std::string& nam
Symbol* PPCSymbolDB::GetSymbolFromAddr(u32 addr)
{
if (!PowerPC::HostIsRAMAddress(addr))
return nullptr;
XFuncMap::iterator it = functions.find(addr);
if (it != functions.end())
{

View File

@ -265,6 +265,8 @@ void ClearCacheLine(const u32 address); // Zeroes 32 bytes; address should be 3
// TLB functions
void SDRUpdated();
void InvalidateTLBEntry(u32 address);
void DBATUpdated();
void IBATUpdated();
// Result changes based on the BAT registers and MSR.DR. Returns whether
// it's safe to optimize a read or write to this address to an unguarded
@ -280,6 +282,12 @@ struct TranslateResult
u32 address;
};
TranslateResult JitCache_TranslateAddress(u32 address);
enum
{
BAT_INDEX_SHIFT = 17
};
extern u32 ibat_table[];
extern u32 dbat_table[];
} // namespace
enum CRBits