Lots of work on JIT (Implemented "unlimited instruction cache" CPU emulation. Works in JIT and JIT IL). Implemented correct PowerPC instruction cache (IC) emulation in the interpreter mode (including HID0 cache management stuff). Removed the "dcbi->dcbf" hack (again... this time it shouldn't break anything ;) )

Fixes issue 917. Fixes issue 1183. Fixes issue 1190. (those are really need to be verified)
Maybe fixes something else. Maybe breaks something. I can't test everything, so please report if you find out something.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4357 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
LinesPrower 2009-10-03 12:29:27 +00:00
parent 98e1dd6dc4
commit b843378636
24 changed files with 538 additions and 126 deletions

View File

@ -963,6 +963,14 @@
RelativePath=".\Src\PowerPC\PPCAnalyst.h" RelativePath=".\Src\PowerPC\PPCAnalyst.h"
> >
</File> </File>
<File
RelativePath=".\Src\PowerPC\PPCCache.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\PPCCache.h"
>
</File>
<File <File
RelativePath=".\Src\PowerPC\PPCSymbolDB.cpp" RelativePath=".\Src\PowerPC\PPCSymbolDB.cpp"
> >

View File

@ -529,16 +529,81 @@ bool AreMemoryBreakpointsActivated()
u32 Read_Instruction(const u32 em_address) u32 Read_Instruction(const u32 em_address)
{ {
UGeckoInstruction inst = ReadUnchecked_U32(em_address); UGeckoInstruction inst = ReadUnchecked_U32(em_address);
if (inst.OPCD == 0)
inst.hex = jit.GetBlockCache()->GetOriginalCode(em_address);
if (inst.OPCD == 1) if (inst.OPCD == 1)
return HLE::GetOrigInstruction(em_address); return HLE::GetOrigInstruction(em_address);
else else
return inst.hex; return inst.hex;
} }
u32 Read_Opcode_JIT(const u32 _Address)
{
#ifdef JIT_UNLIMITED_ICACHE
//return Memory::ReadUnchecked_U32(_Address);
if ((_Address & ~JIT_ICACHE_MASK) != 0x80000000 && (_Address & ~JIT_ICACHE_MASK) != 0x00000000)
{
PanicAlert("iCacheJIT: Reading Opcode from %x. Please report.", _Address);
return 0;
}
u8* iCache = jit.GetBlockCache()->GetICache();
u32 addr = _Address & JIT_ICACHE_MASK;
jit.GetBlockCache()->GetICache();
u32 inst = *(u32*)(iCache + addr);
if (inst == JIT_ICACHE_INVALID_WORD)
{
u32 block_start = addr & ~0x1f;
u8 *pMem = Memory::GetPointer(block_start);
memcpy(iCache + block_start, pMem, 32);
inst = *(u32*)(iCache + addr);
}
inst = Common::swap32(inst);
#else
u32 inst = Memory::ReadUnchecked_U32(_Address);
#endif
if ((inst & 0xfc000000) == 0)
{
inst = jit.GetBlockCache()->GetOriginalFirstOp(inst);
}
//PanicAlert("Read from %x. res = %x. mem=%x", _Address, inst, Memory::Read_U32(_Address));
return inst;
}
u32 Read_Opcode_JIT_LC(const u32 _Address)
{
#ifdef JIT_UNLIMITED_ICACHE
//return Memory::ReadUnchecked_U32(_Address);
if ((_Address & ~JIT_ICACHE_MASK) != 0x80000000 && (_Address & ~JIT_ICACHE_MASK) != 0x00000000)
{
PanicAlert("iCacheJIT: Reading Opcode from %x. Please report.", _Address);
return 0;
}
u8* iCache = jit.GetBlockCache()->GetICache();
u32 addr = _Address & JIT_ICACHE_MASK;
jit.GetBlockCache()->GetICache();
u32 inst = *(u32*)(iCache + addr);
if (inst == JIT_ICACHE_INVALID_WORD)
inst = Memory::ReadUnchecked_U32(_Address);
else
inst = Common::swap32(inst);
#else
u32 inst = Memory::ReadUnchecked_U32(_Address);
#endif
if ((inst & 0xfc000000) == 0)
{
inst = jit.GetBlockCache()->GetOriginalFirstOp(inst);
}
return inst;
}
// WARNING! No checks!
// We assume that _Address is cached
void Write_Opcode_JIT(const u32 _Address, const u32 _Value)
{
#ifdef JIT_UNLIMITED_ICACHE
*(u32*)(jit.GetBlockCache()->GetICache() + (_Address & JIT_ICACHE_MASK)) = Common::swap32(_Value);
#else
Memory::WriteUnchecked_U32(_Value, _Address);
#endif
}
// ======================================================= // =======================================================

View File

@ -90,7 +90,6 @@ namespace Memory
void InitHWMemFuncs(); void InitHWMemFuncs();
void InitHWMemFuncsWii(); void InitHWMemFuncsWii();
u32 Read_Instruction(const u32 _Address);
bool IsRAMAddress(const u32 addr, bool allow_locked_cache = false); bool IsRAMAddress(const u32 addr, bool allow_locked_cache = false);
writeFn32 GetHWWriteFun32(const u32 _Address); writeFn32 GetHWWriteFun32(const u32 _Address);
@ -105,7 +104,17 @@ namespace Memory
#endif #endif
} }
// used by interpreter to read instructions, uses iCache
u32 Read_Opcode(const u32 _Address); u32 Read_Opcode(const u32 _Address);
// used by JIT to read instructions, uses iCacheJIT
u32 Read_Opcode_JIT(const u32 _Address);
// used by JIT. uses iCacheJIT. Reads in the "Locked cache" mode
u32 Read_Opcode_JIT_LC(const u32 _Address);
void Write_Opcode_JIT(const u32 _Address, const u32 _Value);
// this is used by Debugger a lot.
// For now, just reads from memory!
u32 Read_Instruction(const u32 _Address);
// For use by emulator // For use by emulator

View File

@ -294,9 +294,10 @@ u32 Read_Opcode(const u32 _Address)
return 0x00000000; return 0x00000000;
} }
u32 _var = 0; /*u32 _var = 0;
ReadFromHardware<u32>(_var, _Address, _Address, FLAG_OPCODE); ReadFromHardware<u32>(_var, _Address, _Address, FLAG_OPCODE);
return _var; return _var;*/
return PowerPC::ppcState.iCache.ReadInstruction(_Address);
} }
u8 Read_U8(const u32 _Address) u8 Read_U8(const u32 _Address)

View File

@ -391,6 +391,45 @@ union UReg_FPSCR
UReg_FPSCR() { Hex = 0;} UReg_FPSCR() { Hex = 0;}
}; };
// Hardware Implementation-Dependent Register 0
union UReg_HID0
{
struct
{
unsigned NOOPTI : 1;
unsigned : 1;
unsigned BHT : 1;
unsigned ABE : 1;
unsigned : 1;
unsigned BTIC : 1;
unsigned DCFA : 1;
unsigned SGE : 1;
unsigned IFEM : 1;
unsigned SPD : 1;
unsigned DCFI : 1;
unsigned ICFI : 1;
unsigned DLOCK : 1;
unsigned ILOCK : 1;
unsigned DCE : 1;
unsigned ICE : 1;
unsigned NHR : 1;
unsigned : 3;
unsigned DPM : 1;
unsigned SLEEP : 1;
unsigned NAP : 1;
unsigned DOZE : 1;
unsigned PAR : 1;
unsigned ECLK : 1;
unsigned : 1;
unsigned BCLK : 1;
unsigned EBD : 1;
unsigned EBA : 1;
unsigned DBP : 1;
unsigned EMCP : 1;
};
u32 Hex;
};
// Hardware Implementation-Dependent Register 2 // Hardware Implementation-Dependent Register 2
union UReg_HID2 union UReg_HID2
{ {
@ -590,6 +629,8 @@ enum
SPR_DBAT3L = 542, SPR_DBAT3L = 542,
SPR_DBAT3U = 543, SPR_DBAT3U = 543,
SPR_GQR0 = 912, SPR_GQR0 = 912,
SPR_HID0 = 1008,
SPR_HID1 = 1009,
SPR_HID2 = 920, SPR_HID2 = 920,
SPR_WPAR = 921, SPR_WPAR = 921,
SPR_DMAU = 922, SPR_DMAU = 922,

View File

@ -317,14 +317,6 @@ void dcbtst(UGeckoInstruction _inst)
void dcbz(UGeckoInstruction _inst) void dcbz(UGeckoInstruction _inst)
{ {
// hack to prevent clearing of memory cached in the CPU instruction cache
// needed to run WiiWare games
// 0x81330c2c
u32 NextOpcode = Memory::Read_U32(PC+4);
if (NextOpcode == 0x7C0400AC)
{
return;
}
// HACK but works... we think // HACK but works... we think
Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32); Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32);
} }
@ -347,17 +339,8 @@ void eieio(UGeckoInstruction _inst)
void icbi(UGeckoInstruction _inst) void icbi(UGeckoInstruction _inst)
{ {
u32 address = Helper_Get_EA_X(_inst); u32 address = Helper_Get_EA_X(_inst);
// block size seems to be 0x20 PowerPC::ppcState.iCache.Invalidate(address);
address &= ~0x1f; jit.GetBlockCache()->InvalidateICache(address);
// this comment is slightly outdated but still relevant:
// Inform the JIT to kill off this area of code NOW
// VERY IMPORTANT when we start linking blocks
// There are a TON of these so hopefully we can make this mechanism
// fast in the JIT
// ector said that this isn't needed anymore, and that making
// a jit version of this instruction would be easy anyway
//jit.GetBlockCache()->InvalidateCodeRange(address, 0x20);
} }
void lbzux(UGeckoInstruction _inst) void lbzux(UGeckoInstruction _inst)

View File

@ -318,6 +318,28 @@ void mtspr(UGeckoInstruction _inst)
TU = m_GPR[_inst.RD]; TU = m_GPR[_inst.RD];
break; break;
case SPR_HID0: // HID0
{
UReg_HID0 old_hid0;
old_hid0.Hex = oldValue;
if (HID0.ICE != old_hid0.ICE)
{
NOTICE_LOG(POWERPC, "Instruction Cache Enable (HID0.ICE) = %d", (int)HID0.ICE);
}
if (HID0.ILOCK != old_hid0.ILOCK)
{
NOTICE_LOG(POWERPC, "Instruction Cache Lock (HID0.ILOCK) = %d", (int)HID0.ILOCK);
}
if (HID0.ICFI)
{
HID0.ICFI = 0;
NOTICE_LOG(POWERPC, "Flush Instruction Cache! ICE=%d", (int)HID0.ICE);
// this is rather slow
// most games do it only once during initialization
PowerPC::ppcState.iCache.Reset();
}
}
break;
case SPR_HID2: // HID2 case SPR_HID2: // HID2
{ {
UReg_HID2 old_hid2; UReg_HID2 old_hid2;

View File

@ -280,7 +280,7 @@ static GekkoOPTemplate table31[] =
{4, Interpreter::tw, {"tw", OPTYPE_SYSTEM, 0, 1}}, {4, Interpreter::tw, {"tw", OPTYPE_SYSTEM, 0, 1}},
{598, Interpreter::sync, {"sync", OPTYPE_SYSTEM, 0, 2}}, {598, Interpreter::sync, {"sync", OPTYPE_SYSTEM, 0, 2}},
{982, Interpreter::icbi, {"icbi", OPTYPE_SYSTEM, 0, 3}}, {982, Interpreter::icbi, {"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}},
// Unused instructions on GC // Unused instructions on GC
{310, Interpreter::eciwx, {"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, {310, Interpreter::eciwx, {"eciwx", OPTYPE_INTEGER, FL_RC_BIT}},

View File

@ -27,15 +27,10 @@
// * A flush simply does a conditional write to the appropriate CRx. // * A flush simply does a conditional write to the appropriate CRx.
// * If flag available, branch code can become absolutely trivial. // * If flag available, branch code can become absolutely trivial.
// Settings // Settings
// ---------- // ----------
#define JIT_OFF_OPTIONS // Compile with JIT off options #define JIT_OFF_OPTIONS // Compile with JIT off options
// Include // Include
// ---------- // ----------
#if JITTEST #if JITTEST
@ -55,9 +50,6 @@
#include <windows.h> #include <windows.h>
#endif #endif
// Declarations and definitions // Declarations and definitions
// ---------- // ----------
@ -308,6 +300,8 @@ public:
void lmw(UGeckoInstruction inst); void lmw(UGeckoInstruction inst);
void stmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst);
void icbi(UGeckoInstruction inst);
}; };
extern Jit64 jit; extern Jit64 jit;

View File

@ -86,12 +86,23 @@ void AsmRoutineManager::Generate()
dispatcherNoCheck = GetCodePtr(); dispatcherNoCheck = GetCodePtr();
MOV(32, R(EAX), M(&PowerPC::ppcState.pc)); MOV(32, R(EAX), M(&PowerPC::ppcState.pc));
dispatcherPcInEAX = GetCodePtr(); dispatcherPcInEAX = GetCodePtr();
#ifdef JIT_UNLIMITED_ICACHE
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit.GetBlockCache()->GetICache()));
#else
MOV(64, R(RSI), Imm64((u64)jit.GetBlockCache()->GetICache()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
#else
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EBX), Imm32((u32)Memory::base)); MOV(32, R(EBX), Imm32((u32)Memory::base));
MOV(32, R(EAX), MComplex(EBX, EAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(EBX, EAX, SCALE_1, 0));
#else #else
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
#endif
#endif #endif
TEST(32, R(EAX), Imm32(0xFC)); TEST(32, R(EAX), Imm32(0xFC));
FixupBranch notfound = J_CC(CC_NZ); FixupBranch notfound = J_CC(CC_NZ);

View File

@ -509,3 +509,9 @@ void Jit64::stmw(UGeckoInstruction inst)
gpr.UnlockAllX(); gpr.UnlockAllX();
#endif #endif
} }
void Jit64::icbi(UGeckoInstruction inst)
{
Default(inst);
WriteExit(js.compilerPC + 4, 0);
}

View File

@ -294,6 +294,8 @@ public:
void lmw(UGeckoInstruction inst); void lmw(UGeckoInstruction inst);
void stmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst);
void icbi(UGeckoInstruction inst);
}; };
extern Jit64 jit; extern Jit64 jit;
@ -303,4 +305,3 @@ void Jit(u32 em_address);
void ProfiledReJit(); void ProfiledReJit();
#endif #endif

View File

@ -88,6 +88,16 @@ void AsmRoutineManager::Generate()
dispatcherNoCheck = GetCodePtr(); dispatcherNoCheck = GetCodePtr();
MOV(32, R(EAX), M(&PowerPC::ppcState.pc)); MOV(32, R(EAX), M(&PowerPC::ppcState.pc));
dispatcherPcInEAX = GetCodePtr(); dispatcherPcInEAX = GetCodePtr();
#ifdef JIT_UNLIMITED_ICACHE
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit.GetBlockCache()->GetICache()));
#else
MOV(64, R(RSI), Imm64((u64)jit.GetBlockCache()->GetICache()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
#else
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EBX), Imm32((u32)Memory::base)); MOV(32, R(EBX), Imm32((u32)Memory::base));
@ -95,6 +105,8 @@ void AsmRoutineManager::Generate()
#else #else
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
#endif #endif
#endif
TEST(32, R(EAX), Imm32(0xFC)); TEST(32, R(EAX), Imm32(0xFC));
FixupBranch notfound = J_CC(CC_NZ); FixupBranch notfound = J_CC(CC_NZ);
BSWAP(32, EAX); BSWAP(32, EAX);

View File

@ -196,3 +196,9 @@ void Jit64::stmw(UGeckoInstruction inst)
addr = ibuild.EmitAdd(addr, ibuild.EmitIntConst(4)); addr = ibuild.EmitAdd(addr, ibuild.EmitIntConst(4));
} }
} }
void Jit64::icbi(UGeckoInstruction inst)
{
Default(inst);
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
}

View File

@ -60,7 +60,6 @@ using namespace Gen;
#define INVALID_EXIT 0xFFFFFFFF #define INVALID_EXIT 0xFFFFFFFF
bool JitBlock::ContainsAddress(u32 em_address) bool JitBlock::ContainsAddress(u32 em_address)
{ {
// WARNING - THIS DOES NOT WORK WITH INLINING ENABLED. // WARNING - THIS DOES NOT WORK WITH INLINING ENABLED.
@ -86,7 +85,21 @@ bool JitBlock::ContainsAddress(u32 em_address)
#endif #endif
blocks = new JitBlock[MAX_NUM_BLOCKS]; blocks = new JitBlock[MAX_NUM_BLOCKS];
blockCodePointers = new const u8*[MAX_NUM_BLOCKS]; blockCodePointers = new const u8*[MAX_NUM_BLOCKS];
#ifdef JIT_UNLIMITED_ICACHE
if (iCache == 0)
{
iCache = new u8[JIT_ICACHE_SIZE];
}
else
{
PanicAlert("JitBlockCache::Init() - iCache is already initialized");
}
if (iCache == 0)
{
PanicAlert("JitBlockCache::Init() - unable to allocate iCache");
}
memset(iCache, JIT_ICACHE_INVALID_BYTE, JIT_ICACHE_SIZE);
#endif
Clear(); Clear();
} }
@ -94,6 +107,11 @@ bool JitBlock::ContainsAddress(u32 em_address)
{ {
delete [] blocks; delete [] blocks;
delete [] blockCodePointers; delete [] blockCodePointers;
#ifdef JIT_UNLIMITED_ICACHE
if (iCache != 0)
delete [] iCache;
iCache = 0;
#endif
blocks = 0; blocks = 0;
blockCodePointers = 0; blockCodePointers = 0;
num_blocks = 0; num_blocks = 0;
@ -107,17 +125,17 @@ bool JitBlock::ContainsAddress(u32 em_address)
void JitBlockCache::Clear() void JitBlockCache::Clear()
{ {
Core::DisplayMessage("Cleared code cache.", 3000); Core::DisplayMessage("Cleared code cache.", 3000);
// Is destroying the blocks really necessary?
for (int i = 0; i < num_blocks; i++) for (int i = 0; i < num_blocks; i++)
{ {
DestroyBlock(i, false); DestroyBlock(i, false);
} }
links_to.clear(); links_to.clear();
block_map.clear();
num_blocks = 0; num_blocks = 0;
memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS); memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS);
} }
void JitBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag) /*void JitBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag)
{ {
for (int i = 0; i < num_blocks; i++) for (int i = 0; i < num_blocks; i++)
{ {
@ -126,7 +144,7 @@ bool JitBlock::ContainsAddress(u32 em_address)
DestroyBlock(i, false); DestroyBlock(i, false);
} }
} }
} }*/
void JitBlockCache::Reset() void JitBlockCache::Reset()
{ {
@ -161,7 +179,6 @@ bool JitBlock::ContainsAddress(u32 em_address)
JitBlock &b = blocks[num_blocks]; JitBlock &b = blocks[num_blocks];
b.invalid = false; b.invalid = false;
b.originalAddress = em_address; b.originalAddress = em_address;
b.originalFirstOpcode = Memory::ReadFast32(em_address);
b.exitAddress[0] = INVALID_EXIT; b.exitAddress[0] = INVALID_EXIT;
b.exitAddress[1] = INVALID_EXIT; b.exitAddress[1] = INVALID_EXIT;
b.exitPtrs[0] = 0; b.exitPtrs[0] = 0;
@ -177,7 +194,9 @@ bool JitBlock::ContainsAddress(u32 em_address)
{ {
blockCodePointers[block_num] = code_ptr; blockCodePointers[block_num] = code_ptr;
JitBlock &b = blocks[block_num]; JitBlock &b = blocks[block_num];
Memory::WriteUnchecked_U32((JIT_OPCODE << 26) | block_num, blocks[block_num].originalAddress); b.originalFirstOpcode = Memory::Read_Opcode_JIT(b.originalAddress);
Memory::Write_Opcode_JIT(b.originalAddress, (JIT_OPCODE << 26) | block_num);
block_map[std::make_pair(b.originalAddress + b.originalSize - 1, b.originalAddress)] = block_num;
if (block_link) if (block_link)
{ {
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
@ -204,51 +223,52 @@ bool JitBlock::ContainsAddress(u32 em_address)
return blockCodePointers; return blockCodePointers;
} }
#ifdef JIT_UNLIMITED_ICACHE
u8 *JitBlockCache::GetICache()
{
return iCache;
}
#endif
int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr) int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr)
{ {
if (!blocks) if (!blocks)
return -1; return -1;
u32 code = Memory::ReadFast32(addr); #ifdef JIT_UNLIMITED_ICACHE
if ((code >> 26) == JIT_OPCODE) u32 inst = *(u32*)(iCache + (addr & JIT_ICACHE_MASK));
{ inst = Common::swap32(inst);
// Jitted code. #else
unsigned int block = code & 0x03FFFFFF; u32 inst = Memory::ReadFast32(addr);
if (block >= (unsigned int)num_blocks) { #endif
return -1; if (inst & 0xfc000000) // definitely not a JIT block
}
if (blocks[block].originalAddress != addr)
{
//_assert_msg_(DYNA_REC, 0, "GetBlockFromAddress %08x - No match - This is BAD", addr);
return -1;
}
return block;
}
else
{
return -1; return -1;
} if (inst >= num_blocks)
return -1;
if (blocks[inst].originalAddress != addr)
return -1;
return inst;
} }
void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers) void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers)
{
for (int i = 0; i < num_blocks; i++)
if (blocks[i].ContainsAddress(em_address))
block_numbers->push_back(i);
}
u32 JitBlockCache::GetOriginalCode(u32 address)
{ {
int num = GetBlockNumberFromStartAddress(address); for (int i = 0; i < num_blocks; i++)
if (num == -1) if (blocks[i].ContainsAddress(em_address))
return Memory::ReadUnchecked_U32(address); block_numbers->push_back(i);
else }
return blocks[num].originalFirstOpcode;
u32 JitBlockCache::GetOriginalFirstOp(u32 block_num)
{
if (block_num >= num_blocks)
{
//PanicAlert("JitBlockCache::GetOriginalFirstOp - block_num = %u is out of range", block_num);
return block_num;
}
return blocks[block_num].originalFirstOpcode;
} }
CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int blockNumber) CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int blockNumber)
{ {
return (CompiledCode)blockCodePointers[blockNumber]; return (CompiledCode)blocks[blockNumber].normalEntry;
} }
//Block linker //Block linker
@ -301,52 +321,64 @@ void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector<int>
void JitBlockCache::DestroyBlock(int blocknum, bool invalidate) void JitBlockCache::DestroyBlock(int blocknum, bool invalidate)
{ {
u32 codebytes = (JIT_OPCODE << 26) | blocknum; //generate from i if (blocknum < 0 || blocknum >= num_blocks)
{
PanicAlert("DestroyBlock: Invalid block number %d", blocknum);
return;
}
JitBlock &b = blocks[blocknum]; JitBlock &b = blocks[blocknum];
b.invalid = 1; if (b.invalid)
if (codebytes == Memory::ReadFast32(b.originalAddress))
{ {
//nobody has changed it, good if (invalidate)
PanicAlert("Invalidating invalid block %d", blocknum);
return;
}
b.invalid = true;
#ifdef JIT_UNLIMITED_ICACHE
Memory::Write_Opcode_JIT(b.originalAddress, b.originalFirstOpcode);
#else
if (Memory::ReadFast32(b.originalAddress) == blocknum)
Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress); Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress);
} #endif
else if (!invalidate)
{
//PanicAlert("Detected code overwrite");
//else, we may be in trouble, since we apparently know of this block but it's been
//overwritten. We should have thrown it out before, on instruction cache invalidate or something.
//Not ne cessarily bad though , if a game has simply thrown away a lot of code and is now using the space
//for something else, then it's fine.
DEBUG_LOG(MASTER_LOG, "WARNING - ClearCache detected code overwrite @ %08x", blocks[blocknum].originalAddress);
}
// We don't unlink blocks, we just send anyone who tries to run them back to the dispatcher. // We don't unlink blocks, we just send anyone who tries to run them back to the dispatcher.
// Not entirely ideal, but .. pretty good. // Not entirely ideal, but .. pretty good.
// TODO - make sure that the below stuff really is safe.
// Spurious entrances from previously linked blocks can only come through checkedEntry // Spurious entrances from previously linked blocks can only come through checkedEntry
XEmitter emit((u8 *)b.checkedEntry); XEmitter emit((u8 *)b.checkedEntry);
emit.MOV(32, M(&PC), Imm32(b.originalAddress)); emit.MOV(32, M(&PC), Imm32(b.originalAddress));
emit.JMP(asm_routines.dispatcher, true); emit.JMP(asm_routines.dispatcher, true);
// this is not needed really
/*
emit.SetCodePtr((u8 *)blockCodePointers[blocknum]); emit.SetCodePtr((u8 *)blockCodePointers[blocknum]);
emit.MOV(32, M(&PC), Imm32(b.originalAddress)); emit.MOV(32, M(&PC), Imm32(b.originalAddress));
emit.JMP(asm_routines.dispatcher, true); emit.JMP(asm_routines.dispatcher, true);
*/
} }
void JitBlockCache::InvalidateCodeRange(u32 address, u32 length) void JitBlockCache::InvalidateICache(u32 address)
{ {
if (!jit.jo.enableBlocklink) address &= ~0x1f;
return; // destroy JIT blocks
return; // !! this works correctly under assumption that any two overlapping blocks end at the same address
//This is slow but should be safe (zelda needs it for block linking) std::map<pair<u32,u32>, u32>::iterator it1 = block_map.lower_bound(std::make_pair(address, 0)), it2 = it1, it;
for (int i = 0; i < num_blocks; i++) while (it2 != block_map.end() && it2->first.second < address + 0x20)
{ {
if (RangeIntersect(blocks[i].originalAddress, blocks[i].originalAddress + blocks[i].originalSize, DestroyBlock(it2->second, true);
address, address + length)) it2++;
{
DestroyBlock(i, true);
}
} }
if (it1 != it2)
{
block_map.erase(it1, it2);
}
#ifdef JIT_UNLIMITED_ICACHE
// invalidate iCache
if ((address & ~JIT_ICACHE_MASK) != 0x80000000 && (address & ~JIT_ICACHE_MASK) != 0x00000000)
{
return;
}
u32 cacheaddr = address & JIT_ICACHE_MASK;
memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32);
#endif
} }

View File

@ -28,6 +28,17 @@
#include <windows.h> #include <windows.h>
#endif #endif
// emulate CPU with unlimited instruction cache
// the only way to invalidate a region is the "icbi" instruction
#define JIT_UNLIMITED_ICACHE
#define JIT_ICACHE_SIZE 0x2000000
#define JIT_ICACHE_MASK 0x1ffffff
// this corresponds to opcode 5 which is invalid in PowerPC
#define JIT_ICACHE_INVALID_BYTE 0x14
#define JIT_ICACHE_INVALID_WORD 0x14141414
enum BlockFlag enum BlockFlag
{ {
BLOCK_USE_GQR0 = 0x1, BLOCK_USE_GQR1 = 0x2, BLOCK_USE_GQR2 = 0x4, BLOCK_USE_GQR3 = 0x8, BLOCK_USE_GQR0 = 0x1, BLOCK_USE_GQR1 = 0x2, BLOCK_USE_GQR2 = 0x4, BLOCK_USE_GQR3 = 0x8,
@ -71,6 +82,10 @@ class JitBlockCache
JitBlock *blocks; JitBlock *blocks;
int num_blocks; int num_blocks;
std::multimap<u32, int> links_to; std::multimap<u32, int> links_to;
std::map<std::pair<u32,u32>, u32> block_map; // (end_addr, start_addr) -> number
#ifdef JIT_UNLIMITED_ICACHE
u8 *iCache;
#endif
int MAX_NUM_BLOCKS; int MAX_NUM_BLOCKS;
bool RangeIntersect(int s1, int e1, int s2, int e2) const; bool RangeIntersect(int s1, int e1, int s2, int e2) const;
@ -94,6 +109,9 @@ public:
JitBlock *GetBlock(int block_num); JitBlock *GetBlock(int block_num);
int GetNumBlocks() const; int GetNumBlocks() const;
const u8 **GetCodePointers(); const u8 **GetCodePointers();
#ifdef JIT_UNLIMITED_ICACHE
u8 *GetICache();
#endif
// Fast way to get a block. Only works on the first ppc instruction of a block. // Fast way to get a block. Only works on the first ppc instruction of a block.
int GetBlockNumberFromStartAddress(u32 em_address); int GetBlockNumberFromStartAddress(u32 em_address);
@ -104,15 +122,15 @@ public:
// This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime. // This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime.
void GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers); void GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers);
u32 GetOriginalCode(u32 address); u32 GetOriginalFirstOp(u32 block_num);
CompiledCode GetCompiledCodeFromBlock(int blockNumber); CompiledCode GetCompiledCodeFromBlock(int blockNumber);
// DOES NOT WORK CORRECTLY WITH INLINING // DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateCodeRange(u32 em_address, u32 length); void InvalidateICache(u32 em_address);
void DestroyBlock(int blocknum, bool invalidate); void DestroyBlock(int blocknum, bool invalidate);
// Not currently used // Not currently used
void DestroyBlocksWithFlag(BlockFlag death_flag); //void DestroyBlocksWithFlag(BlockFlag death_flag);
}; };
#endif #endif

View File

@ -319,7 +319,7 @@ static GekkoOPTemplate table31[] =
{4, &Jit64::Default}, //"tw", OPTYPE_SYSTEM, 0, 1}}, {4, &Jit64::Default}, //"tw", OPTYPE_SYSTEM, 0, 1}},
{598, &Jit64::DoNothing}, //"sync", OPTYPE_SYSTEM, 0, 2}}, {598, &Jit64::DoNothing}, //"sync", OPTYPE_SYSTEM, 0, 2}},
{982, &Jit64::Default}, //"icbi", OPTYPE_SYSTEM, 0, 3}}, {982, &Jit64::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}},
// Unused instructions on GC // Unused instructions on GC
{310, &Jit64::Default}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, {310, &Jit64::Default}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}},

View File

@ -287,8 +287,7 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer) bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, BlockRegStats *fpa, CodeBuffer *buffer)
{ {
memset(st, 0, sizeof(st)); memset(st, 0, sizeof(st));
UGeckoInstruction previnst = Memory::Read_Opcode_JIT_LC(address - 4);
UGeckoInstruction previnst = Memory::Read_Instruction(address - 4);
if (previnst.hex == 0x4e800020) if (previnst.hex == 0x4e800020)
st->isFirstBlockOfFunction = true; st->isFirstBlockOfFunction = true;
@ -309,10 +308,9 @@ bool Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Blo
{ {
memset(&code[i], 0, sizeof(CodeOp)); memset(&code[i], 0, sizeof(CodeOp));
code[i].address = address; code[i].address = address;
UGeckoInstruction inst = Memory::Read_Instruction(code[i].address);
UGeckoInstruction untouched_op = Memory::ReadUnchecked_U32(code[i].address); UGeckoInstruction inst = Memory::Read_Opcode_JIT(code[i].address);
if (untouched_op.OPCD == 1) // Do handle HLE instructions.
inst = untouched_op;
_assert_msg_(POWERPC, inst.hex != 0, "Zero Op - Error flattening %08x op %08x", address + i*4, inst.hex); _assert_msg_(POWERPC, inst.hex != 0, "Zero Op - Error flattening %08x op %08x", address + i*4, inst.hex);
code[i].inst = inst; code[i].inst = inst;
code[i].branchTo = -1; code[i].branchTo = -1;

View File

@ -0,0 +1,135 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "PPCCache.h"
#include "../HW/Memmap.h"
#include "PowerPC.h"
namespace PowerPC
{
u32 plru_mask[8] = {11,11,19,19,37,37,69,69};
u32 plru_value[8] = {11,3,17,1,36,4,64,0};
InstructionCache::InstructionCache()
{
for (u32 m = 0; m < 0xff; m++)
{
u32 w = 0;
while (m & (1<<w)) w++;
way_from_valid[m] = w;
}
for (u32 m = 0; m < 128; m++)
{
u32 b[7];
for (int i = 0; i < 7; i++) b[i] = m & (1<<i);
u32 w;
if (b[0])
if (b[2])
if (b[6])
w = 7;
else
w = 6;
else
if (b[5])
w = 5;
else
w = 4;
else
if (b[1])
if (b[4])
w = 3;
else
w = 2;
else
if (b[3])
w = 1;
else
w = 0;
way_from_plru[m] = w;
}
}
void InstructionCache::Reset()
{
memset(valid, 0, sizeof(valid));
memset(plru, 0, sizeof(plru));
#ifdef FAST_ICACHE
memset(lookup_table, 0xff, sizeof(lookup_table));
#endif
}
void InstructionCache::Invalidate(u32 addr)
{
if (!HID0.ICE)
return;
// invalidates the whole set
u32 set = (addr >> 5) & 0x7f;
#ifdef FAST_ICACHE
for (int i = 0; i < 8; i++)
if (valid[set] & (1<<i))
{
lookup_table[((tags[set][i] << 7) | set) & 0xfffff] = 0xff;
}
#endif
valid[set] = 0;
}
u32 InstructionCache::ReadInstruction(u32 addr)
{
if (!HID0.ICE) // instuction cache is disabled
return Memory::ReadUnchecked_U32(addr);
u32 set = (addr >> 5) & 0x7f;
u32 tag = addr >> 12;
#ifdef FAST_ICACHE
u32 t = lookup_table[(addr>>5) & 0xfffff];
#else
u32 t = 0xff;
for (u32 i = 0; i < 8; i++)
if (tags[set][i] == tag && (valid[set] & (1<<i)))
{
t = i;
break;
}
#endif
if (t == 0xff) // load to the cache
{
if (HID0.ILOCK) // instruction cache is locked
return Memory::ReadUnchecked_U32(addr);
// select a way
if (valid[set] != 0xff)
t = way_from_valid[valid[set]];
else
t = way_from_plru[plru[set]];
// load
u8 *p = Memory::GetPointer(addr & ~0x1f);
memcpy(data[set][t], p, 32);
#ifdef FAST_ICACHE
if (valid[set] & (1<<t))
lookup_table[((tags[set][t] << 7) | set) & 0xfffff] = 0xff;
lookup_table[(addr>>5) & 0xfffff] = t;
#endif
tags[set][t] = tag;
valid[set] |= 1<<t;
}
// update plru
plru[set] = (plru[set] & ~plru_mask[t]) | plru_value[t];
return Common::swap32(data[set][t][(addr>>2)&7]);
}
}

View File

@ -0,0 +1,55 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _PPCCACHE_H
#define _PPCCACHE_H
#include "Common.h"
#define FAST_ICACHE
namespace PowerPC
{
const u32 ICACHE_SETS = 128;
const u32 ICACHE_WAYS = 8;
// size of an instruction cache block in words
const u32 ICACHE_BLOCK_SIZE = 8;
struct InstructionCache
{
u32 data[ICACHE_SETS][ICACHE_WAYS][ICACHE_BLOCK_SIZE];
u32 tags[ICACHE_SETS][ICACHE_WAYS];
u32 plru[ICACHE_SETS];
u32 valid[ICACHE_SETS];
u32 way_from_valid[255];
u32 way_from_plru[128];
#ifdef FAST_ICACHE
u8 lookup_table[1<<20];
#endif
InstructionCache();
void Reset();
u32 ReadInstruction(u32 addr);
void Invalidate(u32 addr);
};
}
#endif

View File

@ -127,6 +127,8 @@ void Init()
// ... but start as interpreter by default. // ... but start as interpreter by default.
mode = MODE_INTERPRETER; mode = MODE_INTERPRETER;
state = CPU_STEPPING; state = CPU_STEPPING;
ppcState.iCache.Reset();
} }
void Shutdown() void Shutdown()

View File

@ -22,6 +22,7 @@
#include "Gekko.h" #include "Gekko.h"
#include "BreakPoints.h" #include "BreakPoints.h"
#include "../Debugger/PPCDebugInterface.h" #include "../Debugger/PPCDebugInterface.h"
#include "PPCCache.h"
class PointerWrap; class PointerWrap;
@ -64,6 +65,9 @@ struct GC_ALIGNED64(PowerPCState)
// special purpose registers - controlls quantizers, DMA, and lots of other misc extensions. // special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
// also for power management, but we don't care about that. // also for power management, but we don't care about that.
u32 spr[1024]; u32 spr[1024];
InstructionCache iCache;
// JIT-mode instruction cache. Managed by JitCache
}; };
enum CPUState enum CPUState
@ -101,6 +105,7 @@ void OnIdle(u32 _uThreadAddr);
void OnIdleIL(); void OnIdleIL();
// Easy register access macros. // Easy register access macros.
#define HID0 ((UReg_HID0&)PowerPC::ppcState.spr[SPR_HID0])
#define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2]) #define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2])
#define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU]) #define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU])
#define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL]) #define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL])

View File

@ -72,6 +72,7 @@ files = ["ActionReplay.cpp",
"PowerPC/PowerPC.cpp", "PowerPC/PowerPC.cpp",
"PowerPC/PPCAnalyst.cpp", "PowerPC/PPCAnalyst.cpp",
"PowerPC/PPCTables.cpp", "PowerPC/PPCTables.cpp",
"PowerPC/PPCCache.cpp",
"PowerPC/Profiler.cpp", "PowerPC/Profiler.cpp",
"PowerPC/SignatureDB.cpp", "PowerPC/SignatureDB.cpp",
"PowerPC/PPCSymbolDB.cpp", "PowerPC/PPCSymbolDB.cpp",

View File

@ -24,7 +24,11 @@
#include "CoreTiming.h" #include "CoreTiming.h"
#include "HW/HW.h" #include "HW/HW.h"
#include "PowerPC/PowerPC.h" #include "PowerPC/PowerPC.h"
#ifdef JITTEST
#include "PowerPC/Jit64IL/Jit.h"
#else
#include "PowerPC/Jit64/Jit.h" #include "PowerPC/Jit64/Jit.h"
#endif
#include "PluginManager.h" #include "PluginManager.h"
@ -91,6 +95,9 @@ void DoState(PointerWrap &p)
PowerPC::DoState(p); PowerPC::DoState(p);
HW::DoState(p); HW::DoState(p);
CoreTiming::DoState(p); CoreTiming::DoState(p);
#ifdef JIT_UNLIMITED_ICACHE
p.DoVoid(jit.GetBlockCache()->GetICache(), JIT_ICACHE_SIZE);
#endif
} }
void LoadBufferStateCallback(u64 userdata, int cyclesLate) void LoadBufferStateCallback(u64 userdata, int cyclesLate)