From b843378636c118d2e88a25e3e0661f07529af1f4 Mon Sep 17 00:00:00 2001 From: LinesPrower Date: Sat, 3 Oct 2009 12:29:27 +0000 Subject: [PATCH] Lots of work on JIT (Implemented "unlimited instruction cache" CPU emulation. Works in JIT and JIT IL). Implemented correct PowerPC instruction cache (IC) emulation in the interpreter mode (including HID0 cache management stuff). Removed the "dcbi->dcbf" hack (again... this time it shouldn't break anything ;) ) Fixes issue 917. Fixes issue 1183. Fixes issue 1190. (those are really need to be verified) Maybe fixes something else. Maybe breaks something. I can't test everything, so please report if you find out something. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4357 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Core.vcproj | 8 + Source/Core/Core/Src/HW/Memmap.cpp | 71 ++++++- Source/Core/Core/Src/HW/Memmap.h | 13 +- Source/Core/Core/Src/HW/MemmapFunctions.cpp | 5 +- Source/Core/Core/Src/PowerPC/Gekko.h | 41 ++++ .../Interpreter/Interpreter_LoadStore.cpp | 27 +-- .../Interpreter_SystemRegisters.cpp | 22 +++ .../Interpreter/Interpreter_Tables.cpp | 2 +- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 12 +- Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp | 11 ++ .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 6 + Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h | 3 +- .../Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp | 12 ++ .../Src/PowerPC/Jit64IL/Jit_LoadStore.cpp | 6 + .../Core/Src/PowerPC/JitCommon/JitCache.cpp | 182 ++++++++++-------- .../Core/Src/PowerPC/JitCommon/JitCache.h | 24 ++- .../Core/Src/PowerPC/JitCommon/Jit_Tables.cpp | 2 +- Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp | 10 +- Source/Core/Core/Src/PowerPC/PPCCache.cpp | 135 +++++++++++++ Source/Core/Core/Src/PowerPC/PPCCache.h | 55 ++++++ Source/Core/Core/Src/PowerPC/PowerPC.cpp | 2 + Source/Core/Core/Src/PowerPC/PowerPC.h | 7 +- Source/Core/Core/Src/SConscript | 1 + Source/Core/Core/Src/State.cpp | 7 + 24 files changed, 538 insertions(+), 126 deletions(-) create mode 100644 Source/Core/Core/Src/PowerPC/PPCCache.cpp create mode 100644 Source/Core/Core/Src/PowerPC/PPCCache.h diff --git a/Source/Core/Core/Core.vcproj b/Source/Core/Core/Core.vcproj index 874a69ad8a..f43cf7ecfd 100644 --- a/Source/Core/Core/Core.vcproj +++ b/Source/Core/Core/Core.vcproj @@ -963,6 +963,14 @@ RelativePath=".\Src\PowerPC\PPCAnalyst.h" > + + + + diff --git a/Source/Core/Core/Src/HW/Memmap.cpp b/Source/Core/Core/Src/HW/Memmap.cpp index ff223726d8..7b90c582ee 100644 --- a/Source/Core/Core/Src/HW/Memmap.cpp +++ b/Source/Core/Core/Src/HW/Memmap.cpp @@ -528,17 +528,82 @@ bool AreMemoryBreakpointsActivated() u32 Read_Instruction(const u32 em_address) { - UGeckoInstruction inst = ReadUnchecked_U32(em_address); - if (inst.OPCD == 0) - inst.hex = jit.GetBlockCache()->GetOriginalCode(em_address); + UGeckoInstruction inst = ReadUnchecked_U32(em_address); if (inst.OPCD == 1) return HLE::GetOrigInstruction(em_address); else return inst.hex; } +u32 Read_Opcode_JIT(const u32 _Address) +{ +#ifdef JIT_UNLIMITED_ICACHE + //return Memory::ReadUnchecked_U32(_Address); + if ((_Address & ~JIT_ICACHE_MASK) != 0x80000000 && (_Address & ~JIT_ICACHE_MASK) != 0x00000000) + { + PanicAlert("iCacheJIT: Reading Opcode from %x. Please report.", _Address); + return 0; + } + u8* iCache = jit.GetBlockCache()->GetICache(); + u32 addr = _Address & JIT_ICACHE_MASK; + jit.GetBlockCache()->GetICache(); + u32 inst = *(u32*)(iCache + addr); + if (inst == JIT_ICACHE_INVALID_WORD) + { + u32 block_start = addr & ~0x1f; + u8 *pMem = Memory::GetPointer(block_start); + memcpy(iCache + block_start, pMem, 32); + inst = *(u32*)(iCache + addr); + } + inst = Common::swap32(inst); +#else + u32 inst = Memory::ReadUnchecked_U32(_Address); +#endif + if ((inst & 0xfc000000) == 0) + { + inst = jit.GetBlockCache()->GetOriginalFirstOp(inst); + } + //PanicAlert("Read from %x. res = %x. mem=%x", _Address, inst, Memory::Read_U32(_Address)); + return inst; +} +u32 Read_Opcode_JIT_LC(const u32 _Address) +{ +#ifdef JIT_UNLIMITED_ICACHE + //return Memory::ReadUnchecked_U32(_Address); + if ((_Address & ~JIT_ICACHE_MASK) != 0x80000000 && (_Address & ~JIT_ICACHE_MASK) != 0x00000000) + { + PanicAlert("iCacheJIT: Reading Opcode from %x. Please report.", _Address); + return 0; + } + u8* iCache = jit.GetBlockCache()->GetICache(); + u32 addr = _Address & JIT_ICACHE_MASK; + jit.GetBlockCache()->GetICache(); + u32 inst = *(u32*)(iCache + addr); + if (inst == JIT_ICACHE_INVALID_WORD) + inst = Memory::ReadUnchecked_U32(_Address); + else + inst = Common::swap32(inst); +#else + u32 inst = Memory::ReadUnchecked_U32(_Address); +#endif + if ((inst & 0xfc000000) == 0) + { + inst = jit.GetBlockCache()->GetOriginalFirstOp(inst); + } + return inst; +} +// WARNING! No checks! +// We assume that _Address is cached +void Write_Opcode_JIT(const u32 _Address, const u32 _Value) +{ +#ifdef JIT_UNLIMITED_ICACHE + *(u32*)(jit.GetBlockCache()->GetICache() + (_Address & JIT_ICACHE_MASK)) = Common::swap32(_Value); +#else + Memory::WriteUnchecked_U32(_Value, _Address); +#endif +} // ======================================================= diff --git a/Source/Core/Core/Src/HW/Memmap.h b/Source/Core/Core/Src/HW/Memmap.h index b3da643916..720e39359b 100644 --- a/Source/Core/Core/Src/HW/Memmap.h +++ b/Source/Core/Core/Src/HW/Memmap.h @@ -89,8 +89,7 @@ namespace Memory void InitHWMemFuncs(); void InitHWMemFuncsWii(); - - u32 Read_Instruction(const u32 _Address); + bool IsRAMAddress(const u32 addr, bool allow_locked_cache = false); writeFn32 GetHWWriteFun32(const u32 _Address); @@ -105,7 +104,17 @@ namespace Memory #endif } + // used by interpreter to read instructions, uses iCache u32 Read_Opcode(const u32 _Address); + // used by JIT to read instructions, uses iCacheJIT + u32 Read_Opcode_JIT(const u32 _Address); + // used by JIT. uses iCacheJIT. Reads in the "Locked cache" mode + u32 Read_Opcode_JIT_LC(const u32 _Address); + void Write_Opcode_JIT(const u32 _Address, const u32 _Value); + // this is used by Debugger a lot. + // For now, just reads from memory! + u32 Read_Instruction(const u32 _Address); + // For use by emulator diff --git a/Source/Core/Core/Src/HW/MemmapFunctions.cpp b/Source/Core/Core/Src/HW/MemmapFunctions.cpp index 14341522d9..ccbcc3bdcf 100644 --- a/Source/Core/Core/Src/HW/MemmapFunctions.cpp +++ b/Source/Core/Core/Src/HW/MemmapFunctions.cpp @@ -294,9 +294,10 @@ u32 Read_Opcode(const u32 _Address) return 0x00000000; } - u32 _var = 0; + /*u32 _var = 0; ReadFromHardware(_var, _Address, _Address, FLAG_OPCODE); - return _var; + return _var;*/ + return PowerPC::ppcState.iCache.ReadInstruction(_Address); } u8 Read_U8(const u32 _Address) diff --git a/Source/Core/Core/Src/PowerPC/Gekko.h b/Source/Core/Core/Src/PowerPC/Gekko.h index 957c26e208..c1685436ee 100644 --- a/Source/Core/Core/Src/PowerPC/Gekko.h +++ b/Source/Core/Core/Src/PowerPC/Gekko.h @@ -391,6 +391,45 @@ union UReg_FPSCR UReg_FPSCR() { Hex = 0;} }; +// Hardware Implementation-Dependent Register 0 +union UReg_HID0 +{ + struct + { + unsigned NOOPTI : 1; + unsigned : 1; + unsigned BHT : 1; + unsigned ABE : 1; + unsigned : 1; + unsigned BTIC : 1; + unsigned DCFA : 1; + unsigned SGE : 1; + unsigned IFEM : 1; + unsigned SPD : 1; + unsigned DCFI : 1; + unsigned ICFI : 1; + unsigned DLOCK : 1; + unsigned ILOCK : 1; + unsigned DCE : 1; + unsigned ICE : 1; + unsigned NHR : 1; + unsigned : 3; + unsigned DPM : 1; + unsigned SLEEP : 1; + unsigned NAP : 1; + unsigned DOZE : 1; + unsigned PAR : 1; + unsigned ECLK : 1; + unsigned : 1; + unsigned BCLK : 1; + unsigned EBD : 1; + unsigned EBA : 1; + unsigned DBP : 1; + unsigned EMCP : 1; + }; + u32 Hex; +}; + // Hardware Implementation-Dependent Register 2 union UReg_HID2 { @@ -590,6 +629,8 @@ enum SPR_DBAT3L = 542, SPR_DBAT3U = 543, SPR_GQR0 = 912, + SPR_HID0 = 1008, + SPR_HID1 = 1009, SPR_HID2 = 920, SPR_WPAR = 921, SPR_DMAU = 922, diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp index dc608f14bf..17b03721c4 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -316,15 +316,7 @@ void dcbtst(UGeckoInstruction _inst) } void dcbz(UGeckoInstruction _inst) -{ - // hack to prevent clearing of memory cached in the CPU instruction cache - // needed to run WiiWare games - // 0x81330c2c - u32 NextOpcode = Memory::Read_U32(PC+4); - if (NextOpcode == 0x7C0400AC) - { - return; - } +{ // HACK but works... we think Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32); } @@ -345,19 +337,10 @@ void eieio(UGeckoInstruction _inst) } void icbi(UGeckoInstruction _inst) -{ - u32 address = Helper_Get_EA_X(_inst); - // block size seems to be 0x20 - address &= ~0x1f; - - // this comment is slightly outdated but still relevant: - // Inform the JIT to kill off this area of code NOW - // VERY IMPORTANT when we start linking blocks - // There are a TON of these so hopefully we can make this mechanism - // fast in the JIT - // ector said that this isn't needed anymore, and that making - // a jit version of this instruction would be easy anyway - //jit.GetBlockCache()->InvalidateCodeRange(address, 0x20); +{ + u32 address = Helper_Get_EA_X(_inst); + PowerPC::ppcState.iCache.Invalidate(address); + jit.GetBlockCache()->InvalidateICache(address); } void lbzux(UGeckoInstruction _inst) diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index a439b28270..272da1bf3e 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -318,6 +318,28 @@ void mtspr(UGeckoInstruction _inst) TU = m_GPR[_inst.RD]; break; + case SPR_HID0: // HID0 + { + UReg_HID0 old_hid0; + old_hid0.Hex = oldValue; + if (HID0.ICE != old_hid0.ICE) + { + NOTICE_LOG(POWERPC, "Instruction Cache Enable (HID0.ICE) = %d", (int)HID0.ICE); + } + if (HID0.ILOCK != old_hid0.ILOCK) + { + NOTICE_LOG(POWERPC, "Instruction Cache Lock (HID0.ILOCK) = %d", (int)HID0.ILOCK); + } + if (HID0.ICFI) + { + HID0.ICFI = 0; + NOTICE_LOG(POWERPC, "Flush Instruction Cache! ICE=%d", (int)HID0.ICE); + // this is rather slow + // most games do it only once during initialization + PowerPC::ppcState.iCache.Reset(); + } + } + break; case SPR_HID2: // HID2 { UReg_HID2 old_hid2; diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Tables.cpp index 3a6a9b04e3..b465984239 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -280,7 +280,7 @@ static GekkoOPTemplate table31[] = {4, Interpreter::tw, {"tw", OPTYPE_SYSTEM, 0, 1}}, {598, Interpreter::sync, {"sync", OPTYPE_SYSTEM, 0, 2}}, - {982, Interpreter::icbi, {"icbi", OPTYPE_SYSTEM, 0, 3}}, + {982, Interpreter::icbi, {"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}}, // Unused instructions on GC {310, Interpreter::eciwx, {"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 084fb780bf..017332014f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -27,15 +27,10 @@ // * A flush simply does a conditional write to the appropriate CRx. // * If flag available, branch code can become absolutely trivial. - - // Settings // ---------- #define JIT_OFF_OPTIONS // Compile with JIT off options - - - // Include // ---------- #if JITTEST @@ -55,9 +50,6 @@ #include #endif - - - // Declarations and definitions // ---------- @@ -308,9 +300,11 @@ public: void lmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst); + + void icbi(UGeckoInstruction inst); }; extern Jit64 jit; #endif // _JIT_H -#endif // JITTEST +#endif // JITTEST \ No newline at end of file diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index 00b5225dde..aac2095122 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -86,12 +86,23 @@ void AsmRoutineManager::Generate() dispatcherNoCheck = GetCodePtr(); MOV(32, R(EAX), M(&PowerPC::ppcState.pc)); dispatcherPcInEAX = GetCodePtr(); + +#ifdef JIT_UNLIMITED_ICACHE + AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); +#ifdef _M_IX86 + MOV(32, R(EAX), MDisp(EAX, (u32)jit.GetBlockCache()->GetICache())); +#else + MOV(64, R(RSI), Imm64((u64)jit.GetBlockCache()->GetICache())); + MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); +#endif +#else #ifdef _M_IX86 AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); MOV(32, R(EBX), Imm32((u32)Memory::base)); MOV(32, R(EAX), MComplex(EBX, EAX, SCALE_1, 0)); #else MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0)); +#endif #endif TEST(32, R(EAX), Imm32(0xFC)); FixupBranch notfound = J_CC(CC_NZ); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index 958fddf1eb..43a3bbe96a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -509,3 +509,9 @@ void Jit64::stmw(UGeckoInstruction inst) gpr.UnlockAllX(); #endif } + +void Jit64::icbi(UGeckoInstruction inst) +{ + Default(inst); + WriteExit(js.compilerPC + 4, 0); +} \ No newline at end of file diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h index fbe6e38f7c..e0838321a0 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h @@ -294,6 +294,8 @@ public: void lmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst); + + void icbi(UGeckoInstruction inst); }; extern Jit64 jit; @@ -303,4 +305,3 @@ void Jit(u32 em_address); void ProfiledReJit(); #endif - diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp index f63d4f5af5..cccd2c2707 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp @@ -88,6 +88,16 @@ void AsmRoutineManager::Generate() dispatcherNoCheck = GetCodePtr(); MOV(32, R(EAX), M(&PowerPC::ppcState.pc)); dispatcherPcInEAX = GetCodePtr(); + +#ifdef JIT_UNLIMITED_ICACHE + AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); +#ifdef _M_IX86 + MOV(32, R(EAX), MDisp(EAX, (u32)jit.GetBlockCache()->GetICache())); +#else + MOV(64, R(RSI), Imm64((u64)jit.GetBlockCache()->GetICache())); + MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); +#endif +#else #ifdef _M_IX86 AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); MOV(32, R(EBX), Imm32((u32)Memory::base)); @@ -95,6 +105,8 @@ void AsmRoutineManager::Generate() #else MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0)); #endif +#endif + TEST(32, R(EAX), Imm32(0xFC)); FixupBranch notfound = J_CC(CC_NZ); BSWAP(32, EAX); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp index 877f2aa450..aac8516327 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp @@ -196,3 +196,9 @@ void Jit64::stmw(UGeckoInstruction inst) addr = ibuild.EmitAdd(addr, ibuild.EmitIntConst(4)); } } + +void Jit64::icbi(UGeckoInstruction inst) +{ + Default(inst); + ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); +} \ No newline at end of file diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp index cb67441921..13f0307fc2 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp @@ -60,7 +60,6 @@ using namespace Gen; #define INVALID_EXIT 0xFFFFFFFF - bool JitBlock::ContainsAddress(u32 em_address) { // WARNING - THIS DOES NOT WORK WITH INLINING ENABLED. @@ -86,7 +85,21 @@ bool JitBlock::ContainsAddress(u32 em_address) #endif blocks = new JitBlock[MAX_NUM_BLOCKS]; blockCodePointers = new const u8*[MAX_NUM_BLOCKS]; - +#ifdef JIT_UNLIMITED_ICACHE + if (iCache == 0) + { + iCache = new u8[JIT_ICACHE_SIZE]; + } + else + { + PanicAlert("JitBlockCache::Init() - iCache is already initialized"); + } + if (iCache == 0) + { + PanicAlert("JitBlockCache::Init() - unable to allocate iCache"); + } + memset(iCache, JIT_ICACHE_INVALID_BYTE, JIT_ICACHE_SIZE); +#endif Clear(); } @@ -94,6 +107,11 @@ bool JitBlock::ContainsAddress(u32 em_address) { delete [] blocks; delete [] blockCodePointers; +#ifdef JIT_UNLIMITED_ICACHE + if (iCache != 0) + delete [] iCache; + iCache = 0; +#endif blocks = 0; blockCodePointers = 0; num_blocks = 0; @@ -105,19 +123,19 @@ bool JitBlock::ContainsAddress(u32 em_address) // This clears the JIT cache. It's called from JitCache.cpp when the JIT cache // is full and when saving and loading states. void JitBlockCache::Clear() - { + { Core::DisplayMessage("Cleared code cache.", 3000); - // Is destroying the blocks really necessary? for (int i = 0; i < num_blocks; i++) { DestroyBlock(i, false); } links_to.clear(); + block_map.clear(); num_blocks = 0; - memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS); + memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS); } - void JitBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag) + /*void JitBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag) { for (int i = 0; i < num_blocks; i++) { @@ -126,7 +144,7 @@ bool JitBlock::ContainsAddress(u32 em_address) DestroyBlock(i, false); } } - } + }*/ void JitBlockCache::Reset() { @@ -161,7 +179,6 @@ bool JitBlock::ContainsAddress(u32 em_address) JitBlock &b = blocks[num_blocks]; b.invalid = false; b.originalAddress = em_address; - b.originalFirstOpcode = Memory::ReadFast32(em_address); b.exitAddress[0] = INVALID_EXIT; b.exitAddress[1] = INVALID_EXIT; b.exitPtrs[0] = 0; @@ -177,7 +194,9 @@ bool JitBlock::ContainsAddress(u32 em_address) { blockCodePointers[block_num] = code_ptr; JitBlock &b = blocks[block_num]; - Memory::WriteUnchecked_U32((JIT_OPCODE << 26) | block_num, blocks[block_num].originalAddress); + b.originalFirstOpcode = Memory::Read_Opcode_JIT(b.originalAddress); + Memory::Write_Opcode_JIT(b.originalAddress, (JIT_OPCODE << 26) | block_num); + block_map[std::make_pair(b.originalAddress + b.originalSize - 1, b.originalAddress)] = block_num; if (block_link) { for (int i = 0; i < 2; i++) @@ -204,51 +223,52 @@ bool JitBlock::ContainsAddress(u32 em_address) return blockCodePointers; } +#ifdef JIT_UNLIMITED_ICACHE + u8 *JitBlockCache::GetICache() + { + return iCache; + } +#endif + int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr) { if (!blocks) + return -1; +#ifdef JIT_UNLIMITED_ICACHE + u32 inst = *(u32*)(iCache + (addr & JIT_ICACHE_MASK)); + inst = Common::swap32(inst); +#else + u32 inst = Memory::ReadFast32(addr); +#endif + if (inst & 0xfc000000) // definitely not a JIT block return -1; - u32 code = Memory::ReadFast32(addr); - if ((code >> 26) == JIT_OPCODE) - { - // Jitted code. - unsigned int block = code & 0x03FFFFFF; - if (block >= (unsigned int)num_blocks) { - return -1; - } - - if (blocks[block].originalAddress != addr) - { - //_assert_msg_(DYNA_REC, 0, "GetBlockFromAddress %08x - No match - This is BAD", addr); - return -1; - } - return block; - } - else - { + if (inst >= num_blocks) return -1; - } + if (blocks[inst].originalAddress != addr) + return -1; + return inst; } -void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers) -{ - for (int i = 0; i < num_blocks; i++) - if (blocks[i].ContainsAddress(em_address)) - block_numbers->push_back(i); -} - - u32 JitBlockCache::GetOriginalCode(u32 address) + void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers) { - int num = GetBlockNumberFromStartAddress(address); - if (num == -1) - return Memory::ReadUnchecked_U32(address); - else - return blocks[num].originalFirstOpcode; - } + for (int i = 0; i < num_blocks; i++) + if (blocks[i].ContainsAddress(em_address)) + block_numbers->push_back(i); + } + + u32 JitBlockCache::GetOriginalFirstOp(u32 block_num) + { + if (block_num >= num_blocks) + { + //PanicAlert("JitBlockCache::GetOriginalFirstOp - block_num = %u is out of range", block_num); + return block_num; + } + return blocks[block_num].originalFirstOpcode; + } CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int blockNumber) - { - return (CompiledCode)blockCodePointers[blockNumber]; + { + return (CompiledCode)blocks[blockNumber].normalEntry; } //Block linker @@ -301,52 +321,64 @@ void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector void JitBlockCache::DestroyBlock(int blocknum, bool invalidate) { - u32 codebytes = (JIT_OPCODE << 26) | blocknum; //generate from i + if (blocknum < 0 || blocknum >= num_blocks) + { + PanicAlert("DestroyBlock: Invalid block number %d", blocknum); + return; + } JitBlock &b = blocks[blocknum]; - b.invalid = 1; - if (codebytes == Memory::ReadFast32(b.originalAddress)) + if (b.invalid) { - //nobody has changed it, good + if (invalidate) + PanicAlert("Invalidating invalid block %d", blocknum); + return; + } + b.invalid = true; +#ifdef JIT_UNLIMITED_ICACHE + Memory::Write_Opcode_JIT(b.originalAddress, b.originalFirstOpcode); +#else + if (Memory::ReadFast32(b.originalAddress) == blocknum) Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress); - } - else if (!invalidate) - { - //PanicAlert("Detected code overwrite"); - //else, we may be in trouble, since we apparently know of this block but it's been - //overwritten. We should have thrown it out before, on instruction cache invalidate or something. - //Not ne cessarily bad though , if a game has simply thrown away a lot of code and is now using the space - //for something else, then it's fine. - DEBUG_LOG(MASTER_LOG, "WARNING - ClearCache detected code overwrite @ %08x", blocks[blocknum].originalAddress); - } +#endif // We don't unlink blocks, we just send anyone who tries to run them back to the dispatcher. - // Not entirely ideal, but .. pretty good. - - // TODO - make sure that the below stuff really is safe. - + // Not entirely ideal, but .. pretty good. // Spurious entrances from previously linked blocks can only come through checkedEntry XEmitter emit((u8 *)b.checkedEntry); emit.MOV(32, M(&PC), Imm32(b.originalAddress)); emit.JMP(asm_routines.dispatcher, true); - + // this is not needed really + /* emit.SetCodePtr((u8 *)blockCodePointers[blocknum]); emit.MOV(32, M(&PC), Imm32(b.originalAddress)); emit.JMP(asm_routines.dispatcher, true); + */ } - void JitBlockCache::InvalidateCodeRange(u32 address, u32 length) - { - if (!jit.jo.enableBlocklink) - return; - return; - //This is slow but should be safe (zelda needs it for block linking) - for (int i = 0; i < num_blocks; i++) + void JitBlockCache::InvalidateICache(u32 address) + { + address &= ~0x1f; + // destroy JIT blocks + // !! this works correctly under assumption that any two overlapping blocks end at the same address + std::map, u32>::iterator it1 = block_map.lower_bound(std::make_pair(address, 0)), it2 = it1, it; + while (it2 != block_map.end() && it2->first.second < address + 0x20) { - if (RangeIntersect(blocks[i].originalAddress, blocks[i].originalAddress + blocks[i].originalSize, - address, address + length)) - { - DestroyBlock(i, true); - } + DestroyBlock(it2->second, true); + it2++; } + if (it1 != it2) + { + block_map.erase(it1, it2); + } + +#ifdef JIT_UNLIMITED_ICACHE + // invalidate iCache + if ((address & ~JIT_ICACHE_MASK) != 0x80000000 && (address & ~JIT_ICACHE_MASK) != 0x00000000) + { + return; + } + u32 cacheaddr = address & JIT_ICACHE_MASK; + memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32); +#endif } diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h index 4f36025078..db2f9c8f49 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h @@ -28,6 +28,17 @@ #include #endif +// emulate CPU with unlimited instruction cache +// the only way to invalidate a region is the "icbi" instruction +#define JIT_UNLIMITED_ICACHE + +#define JIT_ICACHE_SIZE 0x2000000 +#define JIT_ICACHE_MASK 0x1ffffff +// this corresponds to opcode 5 which is invalid in PowerPC +#define JIT_ICACHE_INVALID_BYTE 0x14 +#define JIT_ICACHE_INVALID_WORD 0x14141414 + + enum BlockFlag { BLOCK_USE_GQR0 = 0x1, BLOCK_USE_GQR1 = 0x2, BLOCK_USE_GQR2 = 0x4, BLOCK_USE_GQR3 = 0x8, @@ -71,6 +82,10 @@ class JitBlockCache JitBlock *blocks; int num_blocks; std::multimap links_to; + std::map, u32> block_map; // (end_addr, start_addr) -> number +#ifdef JIT_UNLIMITED_ICACHE + u8 *iCache; +#endif int MAX_NUM_BLOCKS; bool RangeIntersect(int s1, int e1, int s2, int e2) const; @@ -94,6 +109,9 @@ public: JitBlock *GetBlock(int block_num); int GetNumBlocks() const; const u8 **GetCodePointers(); +#ifdef JIT_UNLIMITED_ICACHE + u8 *GetICache(); +#endif // Fast way to get a block. Only works on the first ppc instruction of a block. int GetBlockNumberFromStartAddress(u32 em_address); @@ -104,15 +122,15 @@ public: // This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime. void GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers); - u32 GetOriginalCode(u32 address); + u32 GetOriginalFirstOp(u32 block_num); CompiledCode GetCompiledCodeFromBlock(int blockNumber); // DOES NOT WORK CORRECTLY WITH INLINING - void InvalidateCodeRange(u32 em_address, u32 length); + void InvalidateICache(u32 em_address); void DestroyBlock(int blocknum, bool invalidate); // Not currently used - void DestroyBlocksWithFlag(BlockFlag death_flag); + //void DestroyBlocksWithFlag(BlockFlag death_flag); }; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Tables.cpp index bcdd8bdbf9..d5fa033081 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Tables.cpp @@ -319,7 +319,7 @@ static GekkoOPTemplate table31[] = {4, &Jit64::Default}, //"tw", OPTYPE_SYSTEM, 0, 1}}, {598, &Jit64::DoNothing}, //"sync", OPTYPE_SYSTEM, 0, 2}}, - {982, &Jit64::Default}, //"icbi", OPTYPE_SYSTEM, 0, 3}}, + {982, &Jit64::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}}, // Unused instructions on GC {310, &Jit64::Default}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp index 436a0e2bd0..4c27a1820a 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp @@ -287,8 +287,7 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b) bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer) { memset(st, 0, sizeof(st)); - - UGeckoInstruction previnst = Memory::Read_Instruction(address - 4); + UGeckoInstruction previnst = Memory::Read_Opcode_JIT_LC(address - 4); if (previnst.hex == 0x4e800020) st->isFirstBlockOfFunction = true; @@ -309,10 +308,9 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo { memset(&code[i], 0, sizeof(CodeOp)); code[i].address = address; - UGeckoInstruction inst = Memory::Read_Instruction(code[i].address); - UGeckoInstruction untouched_op = Memory::ReadUnchecked_U32(code[i].address); - if (untouched_op.OPCD == 1) // Do handle HLE instructions. - inst = untouched_op; + + UGeckoInstruction inst = Memory::Read_Opcode_JIT(code[i].address); + _assert_msg_(POWERPC, inst.hex != 0, "Zero Op - Error flattening %08x op %08x", address + i*4, inst.hex); code[i].inst = inst; code[i].branchTo = -1; diff --git a/Source/Core/Core/Src/PowerPC/PPCCache.cpp b/Source/Core/Core/Src/PowerPC/PPCCache.cpp new file mode 100644 index 0000000000..e0382c1081 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/PPCCache.cpp @@ -0,0 +1,135 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "PPCCache.h" +#include "../HW/Memmap.h" +#include "PowerPC.h" + +namespace PowerPC +{ + + u32 plru_mask[8] = {11,11,19,19,37,37,69,69}; + u32 plru_value[8] = {11,3,17,1,36,4,64,0}; + + InstructionCache::InstructionCache() + { + for (u32 m = 0; m < 0xff; m++) + { + u32 w = 0; + while (m & (1<> 5) & 0x7f; +#ifdef FAST_ICACHE + for (int i = 0; i < 8; i++) + if (valid[set] & (1<> 5) & 0x7f; + u32 tag = addr >> 12; +#ifdef FAST_ICACHE + u32 t = lookup_table[(addr>>5) & 0xfffff]; +#else + u32 t = 0xff; + for (u32 i = 0; i < 8; i++) + if (tags[set][i] == tag && (valid[set] & (1<>5) & 0xfffff] = t; +#endif + tags[set][t] = tag; + valid[set] |= 1<>2)&7]); + } + +} \ No newline at end of file diff --git a/Source/Core/Core/Src/PowerPC/PPCCache.h b/Source/Core/Core/Src/PowerPC/PPCCache.h new file mode 100644 index 0000000000..5732643efa --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/PPCCache.h @@ -0,0 +1,55 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _PPCCACHE_H +#define _PPCCACHE_H + +#include "Common.h" + +#define FAST_ICACHE + +namespace PowerPC +{ + + const u32 ICACHE_SETS = 128; + const u32 ICACHE_WAYS = 8; + // size of an instruction cache block in words + const u32 ICACHE_BLOCK_SIZE = 8; + + struct InstructionCache + { + u32 data[ICACHE_SETS][ICACHE_WAYS][ICACHE_BLOCK_SIZE]; + u32 tags[ICACHE_SETS][ICACHE_WAYS]; + u32 plru[ICACHE_SETS]; + u32 valid[ICACHE_SETS]; + + u32 way_from_valid[255]; + u32 way_from_plru[128]; + +#ifdef FAST_ICACHE + u8 lookup_table[1<<20]; +#endif + + InstructionCache(); + void Reset(); + u32 ReadInstruction(u32 addr); + void Invalidate(u32 addr); + }; + +} + +#endif \ No newline at end of file diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.cpp b/Source/Core/Core/Src/PowerPC/PowerPC.cpp index 1706f68e97..cdcb680b9b 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp @@ -127,6 +127,8 @@ void Init() // ... but start as interpreter by default. mode = MODE_INTERPRETER; state = CPU_STEPPING; + + ppcState.iCache.Reset(); } void Shutdown() diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.h b/Source/Core/Core/Src/PowerPC/PowerPC.h index 1b579b0fb1..44f0b966a5 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.h +++ b/Source/Core/Core/Src/PowerPC/PowerPC.h @@ -22,6 +22,7 @@ #include "Gekko.h" #include "BreakPoints.h" #include "../Debugger/PPCDebugInterface.h" +#include "PPCCache.h" class PointerWrap; @@ -64,6 +65,9 @@ struct GC_ALIGNED64(PowerPCState) // special purpose registers - controlls quantizers, DMA, and lots of other misc extensions. // also for power management, but we don't care about that. u32 spr[1024]; + + InstructionCache iCache; + // JIT-mode instruction cache. Managed by JitCache }; enum CPUState @@ -101,6 +105,7 @@ void OnIdle(u32 _uThreadAddr); void OnIdleIL(); // Easy register access macros. +#define HID0 ((UReg_HID0&)PowerPC::ppcState.spr[SPR_HID0]) #define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2]) #define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU]) #define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL]) @@ -194,4 +199,4 @@ inline void SetXER_SO(int value) { void UpdateFPRF(double dvalue); -#endif +#endif \ No newline at end of file diff --git a/Source/Core/Core/Src/SConscript b/Source/Core/Core/Src/SConscript index 13dd14dc3e..3a677bd3c7 100644 --- a/Source/Core/Core/Src/SConscript +++ b/Source/Core/Core/Src/SConscript @@ -72,6 +72,7 @@ files = ["ActionReplay.cpp", "PowerPC/PowerPC.cpp", "PowerPC/PPCAnalyst.cpp", "PowerPC/PPCTables.cpp", + "PowerPC/PPCCache.cpp", "PowerPC/Profiler.cpp", "PowerPC/SignatureDB.cpp", "PowerPC/PPCSymbolDB.cpp", diff --git a/Source/Core/Core/Src/State.cpp b/Source/Core/Core/Src/State.cpp index 917dc40282..dd2816b1e8 100644 --- a/Source/Core/Core/Src/State.cpp +++ b/Source/Core/Core/Src/State.cpp @@ -24,7 +24,11 @@ #include "CoreTiming.h" #include "HW/HW.h" #include "PowerPC/PowerPC.h" +#ifdef JITTEST +#include "PowerPC/Jit64IL/Jit.h" +#else #include "PowerPC/Jit64/Jit.h" +#endif #include "PluginManager.h" @@ -91,6 +95,9 @@ void DoState(PointerWrap &p) PowerPC::DoState(p); HW::DoState(p); CoreTiming::DoState(p); +#ifdef JIT_UNLIMITED_ICACHE + p.DoVoid(jit.GetBlockCache()->GetICache(), JIT_ICACHE_SIZE); +#endif } void LoadBufferStateCallback(u64 userdata, int cyclesLate)