Consolidate some compare instructions in JIT, preparations for separate CR flag storage, misc other cleanup in cpu core.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1547 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-12-15 19:22:34 +00:00
parent 4b5cfed314
commit 866d4e6bc8
17 changed files with 240 additions and 228 deletions

View File

@ -672,7 +672,8 @@ bool Init()
else
InitHWMemFuncs();
LOG(MEMMAP, "Memory system initialized. RAM at %p (0x80000000 @ %p)", base, base + 0x80000000);
LOG(MEMMAP, "Memory system initialized. RAM at %p (mirrors at 0 @ %p, 0x80000000 @ %p , 0xC0000000 @ %p)",
m_pRAM, m_pPhysicalRAM, m_pVirtualCachedRAM, m_pVirtualUncachedRAM);
m_IsInitialized = true;
return true;
}

View File

@ -117,7 +117,7 @@ void andis_rc(UGeckoInstruction _inst)
void cmpi(UGeckoInstruction _inst)
{
Helper_UpdateCRx(_inst.CRFD, m_GPR[_inst.RA]-_inst.SIMM_16);
Helper_UpdateCRx(_inst.CRFD, m_GPR[_inst.RA] - _inst.SIMM_16);
}
void cmpli(UGeckoInstruction _inst)
@ -128,7 +128,7 @@ void cmpli(UGeckoInstruction _inst)
if (a < b) f = 0x8;
else if (a > b) f = 0x4;
else f = 0x2; //equals
if (XER.SO) f = 0x1;
if (GetXER_SO()) f |= 0x1;
SetCRField(_inst.CRFD, f);
}
@ -151,13 +151,12 @@ void subfic(UGeckoInstruction _inst)
{
/* u32 rra = ~m_GPR[_inst.RA];
s32 immediate = (s16)_inst.SIMM_16 + 1;
// #define CALC_XER_CA(X,Y) (((X) + (Y) < X) ? SET_XER_CA : CLEAR_XER_CA)
if ((rra + immediate) < rra)
XER.CA = 1;
SetCarry(1);
else
XER.CA = 0;
SetCarry(0);
m_GPR[_inst.RD] = rra - immediate;
*/
@ -227,11 +226,10 @@ void cmp(UGeckoInstruction _inst)
s32 a = (s32)m_GPR[_inst.RA];
s32 b = (s32)m_GPR[_inst.RB];
int fTemp = 0x8; // a < b
// if (a < b) fTemp = 0x8; else
if (a > b) fTemp = 0x4;
// if (a < b) fTemp = 0x8; else
if (a > b) fTemp = 0x4;
else if (a == b) fTemp = 0x2;
if (XER.SO) PanicAlert("cmp getting overflow flag"); // fTemp |= 0x1
if (GetXER_SO()) PanicAlert("cmp getting overflow flag"); // fTemp |= 0x1
SetCRField(_inst.CRFD, fTemp);
}
@ -241,10 +239,10 @@ void cmpl(UGeckoInstruction _inst)
u32 b = m_GPR[_inst.RB];
u32 fTemp = 0x8; // a < b
// if (a < b) fTemp = 0x8;else
if (a > b) fTemp = 0x4;
// if (a < b) fTemp = 0x8;else
if (a > b) fTemp = 0x4;
else if (a == b) fTemp = 0x2;
if (XER.SO) PanicAlert("cmpl getting overflow flag"); // fTemp |= 0x1;
if (GetXER_SO()) PanicAlert("cmpl getting overflow flag"); // fTemp |= 0x1;
SetCRField(_inst.CRFD, fTemp);
}

View File

@ -28,6 +28,11 @@
namespace Interpreter
{
// TODO: These should really be in the save state, although it's unlikely to matter much.
// They are for lwarx and its friend stwcxd.
static bool g_bReserve = false;
static u32 g_reserveAddr;
u32 Helper_Get_EA(const UGeckoInstruction _inst)
{
return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
@ -581,37 +586,32 @@ void stwbrx(UGeckoInstruction _inst)
// The following two instructions are for SMP communications. On a single
// CPU, they cannot fail unless an interrupt happens in between, which usually
// won't happen with the JIT.
bool g_bReserve = false;
u32 g_reserveAddr;
// CPU, they cannot fail unless an interrupt happens in between.
void lwarx(UGeckoInstruction _inst)
{
u32 uAddress = Helper_Get_EA_X(_inst);
u32 uAddress = Helper_Get_EA_X(_inst);
m_GPR[_inst.RD] = Memory::Read_U32(uAddress);
g_bReserve = true;
g_reserveAddr = uAddress;
g_bReserve = true;
g_reserveAddr = uAddress;
}
void stwcxd(UGeckoInstruction _inst)
{
// Stores Word Conditional indeXed
u32 uAddress;
if(g_bReserve) {
// Stores Word Conditional indeXed
u32 uAddress;
if (g_bReserve) {
uAddress = Helper_Get_EA_X(_inst);
if(uAddress == g_reserveAddr) {
if (uAddress == g_reserveAddr) {
Memory::Write_U32(m_GPR[_inst.RS], uAddress);
g_bReserve = false;
SetCRField(0, 2 | XER.SO);
return;
}
}
g_bReserve = false;
SetCRField(0, 2 | GetXER_SO());
return;
}
}
SetCRField(0, XER.SO);
SetCRField(0, GetXER_SO());
}
void stwux(UGeckoInstruction _inst)

View File

@ -229,7 +229,6 @@ void ps_cmpu1(UGeckoInstruction _inst)
if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4;
else compareResult = 2;
SetCRField(_inst.CRFD, compareResult);
}

View File

@ -226,8 +226,9 @@ void mtfsfx(UGeckoInstruction _inst)
void mcrxr(UGeckoInstruction _inst)
{
SetCRField(_inst.CRFD, XER.Hex >> 28);
XER.Hex &= ~0xF0000000; // clear 0-3
// USES_XER
SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28);
PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3
}
void mfcr(UGeckoInstruction _inst)

View File

@ -385,6 +385,7 @@ namespace Jit64
js.instructionNumber = i;
if (i == (int)size - 1) {
js.isLastInstruction = true;
js.next_inst = 0;
if (Profiler::g_ProfileBlocks) {
// CAUTION!!! push on stack regs you use, do your stuff, then pop
PROFILER_VPUSH;
@ -394,6 +395,9 @@ namespace Jit64
PROFILER_ADD_DIFF_LARGE_INTEGER(&b.ticCounter, &b.ticStop, &b.ticStart);
PROFILER_VPOP;
}
} else {
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
}
// const GekkoOpInfo *info = GetOpInfo();

View File

@ -49,6 +49,7 @@ namespace Jit64
{
u32 compilerPC;
u32 blockStart;
UGeckoInstruction next_inst; // for easy peephole opt.
int blockSize;
int instructionNumber;
int downcountAmount;
@ -142,10 +143,8 @@ namespace Jit64
void fcmpx(UGeckoInstruction inst);
void fmrx(UGeckoInstruction inst);
void cmpli(UGeckoInstruction inst);
void cmpi(UGeckoInstruction inst);
void cmpl(UGeckoInstruction inst);
void cmp(UGeckoInstruction inst);
void cmpXi(UGeckoInstruction inst);
void cmpX(UGeckoInstruction inst);
void cntlzwx(UGeckoInstruction inst);

View File

@ -324,18 +324,19 @@ void GenFifoXmm64Write()
void GenerateCommon()
{
// USES_CR
computeRc = AlignCode16();
AND(32, M(&CR), Imm32(0x0FFFFFFF));
AND(32, M(&PowerPC::ppcState.cr), Imm32(0x0FFFFFFF));
CMP(32, R(EAX), Imm8(0));
FixupBranch pLesser = J_CC(CC_L);
FixupBranch pGreater = J_CC(CC_G);
OR(32, M(&CR), Imm32(0x20000000)); // _x86Reg == 0
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x20000000)); // _x86Reg == 0
RET();
SetJumpTarget(pGreater);
OR(32, M(&CR), Imm32(0x40000000)); // _x86Reg > 0
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x40000000)); // _x86Reg > 0
RET();
SetJumpTarget(pLesser);
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000)); // _x86Reg < 0
RET();
fifoDirectWrite8 = AlignCode4();

View File

@ -108,6 +108,7 @@ namespace Jit64
// variants of this instruction.
void bcx(UGeckoInstruction inst)
{
// USES_CR
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
gpr.Flush(FLUSH_ALL);
@ -124,7 +125,7 @@ namespace Jit64
if ((inst.BO & 16) == 0) // Test a CR bit
{
TEST(32, M(&CR), Imm32(0x80000000 >> inst.BI));
TEST(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000 >> inst.BI));
if (inst.BO & 8) // Conditional branch
branch = CC_NZ;
else
@ -181,14 +182,14 @@ namespace Jit64
{
skip = J_CC(branch);
}
u32 destination;
if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
if(inst.AA)
destination = SignExt16(inst.BD << 2);
else
destination = js.compilerPC + SignExt16(inst.BD << 2);
WriteExit(destination, 0);
u32 destination;
if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
if(inst.AA)
destination = SignExt16(inst.BD << 2);
else
destination = js.compilerPC + SignExt16(inst.BD << 2);
WriteExit(destination, 0);
if (inst.BO != 20)
{
SetJumpTarget(skip);

View File

@ -205,8 +205,8 @@ namespace Jit64
{
fpr.LoadToX64(a, true);
}
AND(32, M(&CR), Imm32(~(0xF0000000 >> shift)));
// USES_CR
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> shift)));
if (ordered)
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
else
@ -226,7 +226,7 @@ namespace Jit64
SetJumpTarget(continue1);
SetJumpTarget(continue2);
SHR(32, R(EAX), Imm8(shift));
OR(32, M(&CR), R(EAX));
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
fpr.UnlockAll();
}

View File

@ -24,6 +24,7 @@
#include "JitCache.h"
#include "JitRegCache.h"
#include "JitAsm.h"
#include "Jit_Util.h"
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
@ -32,10 +33,11 @@ namespace Jit64
{
// Assumes that the flags were just set through an addition.
void GenerateCarry(X64Reg temp_reg) {
// USES_XER
SETcc(CC_C, R(temp_reg));
AND(32, M(&XER), Imm32(~(1 << 29)));
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
SHL(32, R(temp_reg), Imm8(29));
OR(32, M(&XER), R(temp_reg));
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(temp_reg));
}
typedef u32 (*Operation)(u32 a, u32 b);
@ -133,26 +135,49 @@ namespace Jit64
}
}
/*
if (js.next_inst.OPCD == 16) { // bcx
if (!js.next_inst.LK && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG))
{
// it's clear there's plenty of opportunity.
//PanicAlert("merge");
}
}
*/
// unsigned
void cmpli(UGeckoInstruction inst)
void cmpXi(UGeckoInstruction inst)
{
// Should check if the next intruction is a branch - if it is, merge the two. This can save
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
// towards branches.
// USES_CR
#ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
#endif
// Should check if the next intruction is a branch - if it is, merge the two. This can save
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
// towards branches.
INSTRUCTION_START;
int a = inst.RA;
u32 uimm = inst.UIMM;
int crf = inst.CRFD;
int shift = crf * 4;
Gen::CCFlags less_than, greater_than;
OpArg comparand;
if (inst.OPCD == 10) {
less_than = CC_B;
greater_than = CC_A;
comparand = Imm32(inst.UIMM);
} else {
less_than = CC_L;
greater_than = CC_G;
comparand = Imm32((s32)(s16)inst.UIMM);
}
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), Imm32(uimm));
FixupBranch pLesser = J_CC(CC_B);
FixupBranch pGreater = J_CC(CC_A);
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), comparand);
FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(greater_than);
MOV(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0
FixupBranch continue1 = J();
@ -165,44 +190,17 @@ namespace Jit64
MOV(32, R(EAX), Imm32(0x80000000 >> shift));// _x86Reg < 0
SetJumpTarget(continue1);
SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX));
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
// TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches.
// Since it's the last block, some liberties can be taken.
// don't forget to flush registers AFTER the cmp BEFORE the jmp. Flushing doesn't affect flags.
}
// signed
void cmpi(UGeckoInstruction inst)
{
#ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
#endif
INSTRUCTION_START;
int a = inst.RA;
s32 simm = (s32)(s16)inst.UIMM;
int crf = inst.CRFD;
int shift = crf * 4;
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), Imm32(simm));
FixupBranch pLesser = J_CC(CC_L);
FixupBranch pGreater = J_CC(CC_G);
// _x86Reg == 0
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
FixupBranch continue1 = J();
// _x86Reg > 0
SetJumpTarget(pGreater);
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
FixupBranch continue2 = J();
// _x86Reg < 0
SetJumpTarget(pLesser);
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
SetJumpTarget(continue1);
SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX));
}
// signed
void cmp(UGeckoInstruction inst)
void cmpX(UGeckoInstruction inst)
{
// USES_CR
#ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
@ -212,12 +210,21 @@ namespace Jit64
int b = inst.RB;
int crf = inst.CRFD;
int shift = crf * 4;
Gen::CCFlags less_than, greater_than;
Gen::OpArg comparand = gpr.R(b);
if (inst.SUBOP10 == 32) {
less_than = CC_B;
greater_than = CC_A;
} else {
less_than = CC_L;
greater_than = CC_G;
}
gpr.Lock(a, b);
gpr.LoadToX64(a, true, false);
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), gpr.R(b));
FixupBranch pLesser = J_CC(CC_L);
FixupBranch pGreater = J_CC(CC_G);
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), comparand);
FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(greater_than);
// _x86Reg == 0
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
FixupBranch continue1 = J();
@ -230,41 +237,7 @@ namespace Jit64
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
SetJumpTarget(continue1);
SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX));
gpr.UnlockAll();
}
// unsigned
void cmpl(UGeckoInstruction inst)
{
#ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
#endif
INSTRUCTION_START;
int a = inst.RA;
int b = inst.RB;
int crf = inst.CRFD;
int shift = crf * 4;
gpr.Lock(a, b);
gpr.LoadToX64(a, true, false);
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), gpr.R(b));
FixupBranch pLesser = J_CC(CC_B);
FixupBranch pGreater = J_CC(CC_A);
// _x86Reg == 0
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
FixupBranch continue1 = J();
// _x86Reg > 0
SetJumpTarget(pGreater);
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
FixupBranch continue2 = J();
// _x86Reg < 0
SetJumpTarget(pLesser);
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
SetJumpTarget(continue1);
SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX));
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
gpr.UnlockAll();
}
@ -652,6 +625,7 @@ namespace Jit64
// This can be optimized
void addex(UGeckoInstruction inst)
{
// USES_XER
#ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
@ -664,7 +638,7 @@ namespace Jit64
gpr.LoadToX64(d, false);
else
gpr.LoadToX64(d, true);
MOV(32, R(EAX), M(&XER));
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
MOV(32, R(EAX), gpr.R(a));
ADC(32, R(EAX), gpr.R(b));
@ -895,6 +869,7 @@ namespace Jit64
void srawx(UGeckoInstruction inst)
{
// USES_XER
#ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
@ -919,17 +894,17 @@ namespace Jit64
CMP(32, R(EAX), Imm32(-1));
SETcc(CC_L, R(EAX));
SAR(32, gpr.R(a), R(ECX));
AND(32, M(&XER), Imm32(~(1 << 29)));
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
SHL(32, R(EAX), Imm8(29));
OR(32, M(&XER), R(EAX));
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
FixupBranch end = J();
SetJumpTarget(topBitSet);
MOV(32, R(EAX), gpr.R(s));
SAR(32, R(EAX), Imm8(31));
MOV(32, gpr.R(a), R(EAX));
AND(32, M(&XER), Imm32(~(1 << 29)));
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
AND(32, R(EAX), Imm32(1<<29));
OR(32, M(&XER), R(EAX));
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
SetJumpTarget(end);
gpr.UnlockAll();
gpr.UnlockAllX();
@ -961,11 +936,11 @@ namespace Jit64
FixupBranch nocarry1 = J_CC(CC_GE);
TEST(32, R(EAX), Imm32((u32)0xFFFFFFFF >> (32 - amount))); // were any 1s shifted out?
FixupBranch nocarry2 = J_CC(CC_Z);
OR(32, M(&XER), Imm32(XER_CA_MASK)); //XER.CA = 1
JitSetCA();
FixupBranch carry = J(false);
SetJumpTarget(nocarry1);
SetJumpTarget(nocarry2);
AND(32, M(&XER), Imm32(~XER_CA_MASK)); //XER.CA = 0
JitClearCA();
SetJumpTarget(carry);
gpr.UnlockAll();
}
@ -973,7 +948,7 @@ namespace Jit64
{
Default(inst); return;
gpr.Lock(a, s);
AND(32, M(&XER), Imm32(~XER_CA_MASK)); //XER.CA = 0
JitClearCA();
gpr.LoadToX64(a, a == s, true);
if (a != s)
MOV(32, gpr.R(a), gpr.R(s));

View File

@ -37,6 +37,16 @@
namespace Jit64
{
void JitClearCA()
{
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
}
void JitSetCA()
{
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
}
void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{
#ifdef _M_IX86

View File

@ -33,4 +33,7 @@ void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
void ForceSinglePrecisionS(X64Reg xmm);
void ForceSinglePrecisionP(X64Reg xmm);
void JitClearCA();
void JitSetCA();
} // namespace

View File

@ -285,19 +285,10 @@ void FixUpInternalBranches(CodeOp *code, int begin, int end)
}
}
void ShuffleUp(CodeOp *code, int first, int last)
{
CodeOp temp = code[first];
for (int i = first; i < last; i++)
code[i] = code[i + 1];
code[last] = temp;
}
// IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE
bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
{
// Disabled for now
return false;
return false; // Currently deactivated in SVN.
const GekkoOPInfo *a_info = GetOpInfo(a.inst);
const GekkoOPInfo *b_info = GetOpInfo(b.inst);
@ -308,7 +299,6 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1))
return false;
// 10 cmpi, 11 cmpli - we got a compare!
switch (b.inst.OPCD)
{
case 16:
@ -323,20 +313,34 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
// For now, only integer ops acceptable.
switch (b_info->type) {
case OPTYPE_INTEGER:
case OPTYPE_LOAD:
case OPTYPE_STORE:
case OPTYPE_LOADFP:
case OPTYPE_STOREFP:
break;
default:
return false;
}
// Check that we have no register collisions.
// That is, check that none of b's outputs matches any of a's inputs,
// and that none of a's outputs matches any of b's inputs.
// The latter does not apply if a is a cmp, of course, but doesn't hurt to check.
bool no_swap = false;
for (int j = 0; j < 3; j++)
{
int regIn = a.regsIn[j];
if (regIn < 0)
continue;
if (b.regsOut[0] == regIn ||
b.regsOut[1] == regIn)
int regInA = a.regsIn[j];
int regInB = b.regsIn[j];
if (regInA >= 0 &&
b.regsOut[0] == regInA ||
b.regsOut[1] == regInA)
{
// reg collision! don't swap
return false;
}
if (regInB >= 0 &&
a.regsOut[0] == regInB ||
a.regsOut[1] == regInB)
{
// reg collision! don't swap
return false;
@ -346,6 +350,7 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
return true;
}
// Does not yet perform inlining - although there are plans for that.
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa)
{
int numCycles = 0;
@ -623,9 +628,8 @@ CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa,
}
}
//Scan for CR0 dependency
//assume next block wants CR0 to be safe
// Scan for CR0 dependency
// assume next block wants CR0 to be safe
bool wantsCR0 = true;
bool wantsCR1 = true;
bool wantsPS1 = true;

View File

@ -79,8 +79,6 @@ struct BlockRegStats
void Init();
void Shutdown();
void ShuffleUp(CodeOp *code, int first, int last);
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa);
void LogFunctionCall(u32 addr);

View File

@ -141,8 +141,8 @@ GekkoOPTemplate primarytable[] =
{7, Interpreter::mulli, Jit64::mulli, {"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
{8, Interpreter::subfic, Jit64::subfic, {"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{10, Interpreter::cmpli, Jit64::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, Interpreter::cmpi, Jit64::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{10, Interpreter::cmpli, Jit64::cmpXi, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, Interpreter::cmpi, Jit64::cmpXi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{12, Interpreter::addic, Jit64::reg_imm, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, Interpreter::addic_rc, Jit64::reg_imm, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
{14, Interpreter::addi, Jit64::reg_imm, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
@ -283,8 +283,8 @@ GekkoOPTemplate table31[] =
{412, Interpreter::orcx, Jit64::Default, {"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{476, Interpreter::nandx, Jit64::Default, {"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{284, Interpreter::eqvx, Jit64::Default, {"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{0, Interpreter::cmp, Jit64::cmp, {"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{32, Interpreter::cmpl, Jit64::cmpl, {"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{0, Interpreter::cmp, Jit64::cmpX, {"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{32, Interpreter::cmpl, Jit64::cmpX, {"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{26, Interpreter::cntlzwx, Jit64::cntlzwx, {"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{922, Interpreter::extshx, Jit64::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{954, Interpreter::extsbx, Jit64::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},

View File

@ -25,77 +25,75 @@ class PointerWrap;
namespace PowerPC
{
enum CoreMode
{
MODE_INTERPRETER,
MODE_JIT,
};
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
struct GC_ALIGNED64(PowerPCState)
{
u32 mojs[128]; // Try to isolate the regs from other variables in the cache.
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
enum CoreMode
{
MODE_INTERPRETER,
MODE_JIT,
};
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
u64 ps[32][2];
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
struct GC_ALIGNED64(PowerPCState)
{
u32 mojs[128]; // Try to isolate the regs from other variables in the cache.
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
u32 pc; // program counter
u32 npc;
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
u64 ps[32][2];
u32 cr; // flags
u32 msr; // machine specific register
u32 fpscr; // floating point flags/status bits
u32 pc; // program counter
u32 npc;
// Exception management.
u32 Exceptions;
u32 cr; // flags
u32 msr; // machine specific register
u32 fpscr; // floating point flags/status bits
u32 sr[16]; // Segment registers. Unused.
// Exception management.
u32 Exceptions;
u32 DebugCount;
// special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
// also for power management, but we don't care about that.
u32 spr[1024];
};
u32 sr[16]; // Segment registers. Unused.
enum CPUState
{
CPU_RUNNING = 0,
CPU_RUNNINGDEBUG = 1,
CPU_STEPPING = 2,
CPU_POWERDOWN = 3,
};
u32 DebugCount;
// special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
// also for power management, but we don't care about that.
u32 spr[1024];
};
extern PowerPCState ppcState;
extern volatile CPUState state; // Execution engines should poll this to know when to exit.
enum CPUState
{
CPU_RUNNING = 0,
CPU_RUNNINGDEBUG = 1,
CPU_STEPPING = 2,
CPU_POWERDOWN = 3,
};
void Init();
void Shutdown();
void DoState(PointerWrap &p);
extern PowerPCState ppcState;
extern volatile CPUState state; // Execution engines should poll this to know when to exit.
void SetMode(CoreMode _coreType);
void Init();
void Shutdown();
void DoState(PointerWrap &p);
void SingleStep();
void CheckExceptions();
void RunLoop();
void Start();
void Pause();
void Stop();
void SetMode(CoreMode _coreType);
void OnIdle(u32 _uThreadAddr);
}
void SingleStep();
void CheckExceptions();
void RunLoop();
void Start();
void Pause();
void Stop();
// Easy register access macros.
void OnIdle(u32 _uThreadAddr);
// Easy register access macros.
#define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2])
#define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU])
#define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL])
#define XER ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER])
#define PC PowerPC::ppcState.pc
#define NPC PowerPC::ppcState.npc
#define CR PowerPC::ppcState.cr
#define FPSCR ((UReg_FPSCR&)PowerPC::ppcState.fpscr)
#define MSR PowerPC::ppcState.msr
#define GPR(n) PowerPC::ppcState.gpr[n]
@ -121,11 +119,13 @@ namespace PowerPC
#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
} // namespace
// Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits
// to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we
// need the corresponding stuff on the JIT side too.
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
inline void SetCRField(int cr_field, int value) {
PowerPC::ppcState.cr = (PowerPC::ppcState.cr & (~(0xF0000000 >> (cr_field * 4)))) | (value << ((7 - cr_field) * 4));
}
@ -135,9 +135,10 @@ inline u32 GetCRField(int cr_field) {
}
inline u32 GetCRBit(int bit) {
return (CR >> (31 - bit)) & 1;
return (PowerPC::ppcState.cr >> (31 - bit)) & 1;
}
// SetCR and GetCR may become fairly slow soon. Should be avoided if possible.
inline void SetCR(u32 new_cr) {
PowerPC::ppcState.cr = new_cr;
}
@ -146,12 +147,29 @@ inline u32 GetCR() {
return PowerPC::ppcState.cr;
}
// SetCarry/GetCarry may speed up soon.
inline void SetCarry(int ca) {
XER.CA = ca;
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca;
}
inline int GetCarry() {
return XER.CA;
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA;
}
inline UReg_XER GetXER() {
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]);
}
inline void SetXER(UReg_XER new_xer) {
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer;
}
inline int GetXER_SO() {
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO;
}
inline void SetXER_SO(int value) {
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
}
#endif