mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-26 12:35:27 +00:00
Consolidate some compare instructions in JIT, preparations for separate CR flag storage, misc other cleanup in cpu core.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1547 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
4b5cfed314
commit
866d4e6bc8
@ -672,7 +672,8 @@ bool Init()
|
||||
else
|
||||
InitHWMemFuncs();
|
||||
|
||||
LOG(MEMMAP, "Memory system initialized. RAM at %p (0x80000000 @ %p)", base, base + 0x80000000);
|
||||
LOG(MEMMAP, "Memory system initialized. RAM at %p (mirrors at 0 @ %p, 0x80000000 @ %p , 0xC0000000 @ %p)",
|
||||
m_pRAM, m_pPhysicalRAM, m_pVirtualCachedRAM, m_pVirtualUncachedRAM);
|
||||
m_IsInitialized = true;
|
||||
return true;
|
||||
}
|
||||
|
@ -117,7 +117,7 @@ void andis_rc(UGeckoInstruction _inst)
|
||||
|
||||
void cmpi(UGeckoInstruction _inst)
|
||||
{
|
||||
Helper_UpdateCRx(_inst.CRFD, m_GPR[_inst.RA]-_inst.SIMM_16);
|
||||
Helper_UpdateCRx(_inst.CRFD, m_GPR[_inst.RA] - _inst.SIMM_16);
|
||||
}
|
||||
|
||||
void cmpli(UGeckoInstruction _inst)
|
||||
@ -128,7 +128,7 @@ void cmpli(UGeckoInstruction _inst)
|
||||
if (a < b) f = 0x8;
|
||||
else if (a > b) f = 0x4;
|
||||
else f = 0x2; //equals
|
||||
if (XER.SO) f = 0x1;
|
||||
if (GetXER_SO()) f |= 0x1;
|
||||
SetCRField(_inst.CRFD, f);
|
||||
}
|
||||
|
||||
@ -151,13 +151,12 @@ void subfic(UGeckoInstruction _inst)
|
||||
{
|
||||
/* u32 rra = ~m_GPR[_inst.RA];
|
||||
s32 immediate = (s16)_inst.SIMM_16 + 1;
|
||||
|
||||
|
||||
// #define CALC_XER_CA(X,Y) (((X) + (Y) < X) ? SET_XER_CA : CLEAR_XER_CA)
|
||||
if ((rra + immediate) < rra)
|
||||
XER.CA = 1;
|
||||
SetCarry(1);
|
||||
else
|
||||
XER.CA = 0;
|
||||
SetCarry(0);
|
||||
|
||||
m_GPR[_inst.RD] = rra - immediate;
|
||||
*/
|
||||
@ -227,11 +226,10 @@ void cmp(UGeckoInstruction _inst)
|
||||
s32 a = (s32)m_GPR[_inst.RA];
|
||||
s32 b = (s32)m_GPR[_inst.RB];
|
||||
int fTemp = 0x8; // a < b
|
||||
|
||||
// if (a < b) fTemp = 0x8; else
|
||||
if (a > b) fTemp = 0x4;
|
||||
// if (a < b) fTemp = 0x8; else
|
||||
if (a > b) fTemp = 0x4;
|
||||
else if (a == b) fTemp = 0x2;
|
||||
if (XER.SO) PanicAlert("cmp getting overflow flag"); // fTemp |= 0x1
|
||||
if (GetXER_SO()) PanicAlert("cmp getting overflow flag"); // fTemp |= 0x1
|
||||
SetCRField(_inst.CRFD, fTemp);
|
||||
}
|
||||
|
||||
@ -241,10 +239,10 @@ void cmpl(UGeckoInstruction _inst)
|
||||
u32 b = m_GPR[_inst.RB];
|
||||
u32 fTemp = 0x8; // a < b
|
||||
|
||||
// if (a < b) fTemp = 0x8;else
|
||||
if (a > b) fTemp = 0x4;
|
||||
// if (a < b) fTemp = 0x8;else
|
||||
if (a > b) fTemp = 0x4;
|
||||
else if (a == b) fTemp = 0x2;
|
||||
if (XER.SO) PanicAlert("cmpl getting overflow flag"); // fTemp |= 0x1;
|
||||
if (GetXER_SO()) PanicAlert("cmpl getting overflow flag"); // fTemp |= 0x1;
|
||||
SetCRField(_inst.CRFD, fTemp);
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,11 @@
|
||||
namespace Interpreter
|
||||
{
|
||||
|
||||
// TODO: These should really be in the save state, although it's unlikely to matter much.
|
||||
// They are for lwarx and its friend stwcxd.
|
||||
static bool g_bReserve = false;
|
||||
static u32 g_reserveAddr;
|
||||
|
||||
u32 Helper_Get_EA(const UGeckoInstruction _inst)
|
||||
{
|
||||
return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
|
||||
@ -581,37 +586,32 @@ void stwbrx(UGeckoInstruction _inst)
|
||||
|
||||
|
||||
// The following two instructions are for SMP communications. On a single
|
||||
// CPU, they cannot fail unless an interrupt happens in between, which usually
|
||||
// won't happen with the JIT.
|
||||
bool g_bReserve = false;
|
||||
u32 g_reserveAddr;
|
||||
// CPU, they cannot fail unless an interrupt happens in between.
|
||||
|
||||
void lwarx(UGeckoInstruction _inst)
|
||||
{
|
||||
u32 uAddress = Helper_Get_EA_X(_inst);
|
||||
|
||||
u32 uAddress = Helper_Get_EA_X(_inst);
|
||||
m_GPR[_inst.RD] = Memory::Read_U32(uAddress);
|
||||
g_bReserve = true;
|
||||
g_reserveAddr = uAddress;
|
||||
|
||||
g_bReserve = true;
|
||||
g_reserveAddr = uAddress;
|
||||
}
|
||||
|
||||
void stwcxd(UGeckoInstruction _inst)
|
||||
{
|
||||
// Stores Word Conditional indeXed
|
||||
|
||||
u32 uAddress;
|
||||
|
||||
if(g_bReserve) {
|
||||
// Stores Word Conditional indeXed
|
||||
u32 uAddress;
|
||||
if (g_bReserve) {
|
||||
uAddress = Helper_Get_EA_X(_inst);
|
||||
if(uAddress == g_reserveAddr) {
|
||||
if (uAddress == g_reserveAddr) {
|
||||
Memory::Write_U32(m_GPR[_inst.RS], uAddress);
|
||||
g_bReserve = false;
|
||||
SetCRField(0, 2 | XER.SO);
|
||||
return;
|
||||
}
|
||||
}
|
||||
g_bReserve = false;
|
||||
SetCRField(0, 2 | GetXER_SO());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
SetCRField(0, XER.SO);
|
||||
SetCRField(0, GetXER_SO());
|
||||
}
|
||||
|
||||
void stwux(UGeckoInstruction _inst)
|
||||
|
@ -229,7 +229,6 @@ void ps_cmpu1(UGeckoInstruction _inst)
|
||||
if (fa < fb) compareResult = 8;
|
||||
else if (fa > fb) compareResult = 4;
|
||||
else compareResult = 2;
|
||||
|
||||
SetCRField(_inst.CRFD, compareResult);
|
||||
}
|
||||
|
||||
|
@ -226,8 +226,9 @@ void mtfsfx(UGeckoInstruction _inst)
|
||||
|
||||
void mcrxr(UGeckoInstruction _inst)
|
||||
{
|
||||
SetCRField(_inst.CRFD, XER.Hex >> 28);
|
||||
XER.Hex &= ~0xF0000000; // clear 0-3
|
||||
// USES_XER
|
||||
SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28);
|
||||
PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3
|
||||
}
|
||||
|
||||
void mfcr(UGeckoInstruction _inst)
|
||||
|
@ -385,6 +385,7 @@ namespace Jit64
|
||||
js.instructionNumber = i;
|
||||
if (i == (int)size - 1) {
|
||||
js.isLastInstruction = true;
|
||||
js.next_inst = 0;
|
||||
if (Profiler::g_ProfileBlocks) {
|
||||
// CAUTION!!! push on stack regs you use, do your stuff, then pop
|
||||
PROFILER_VPUSH;
|
||||
@ -394,6 +395,9 @@ namespace Jit64
|
||||
PROFILER_ADD_DIFF_LARGE_INTEGER(&b.ticCounter, &b.ticStop, &b.ticStart);
|
||||
PROFILER_VPOP;
|
||||
}
|
||||
} else {
|
||||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
}
|
||||
|
||||
// const GekkoOpInfo *info = GetOpInfo();
|
||||
|
@ -49,6 +49,7 @@ namespace Jit64
|
||||
{
|
||||
u32 compilerPC;
|
||||
u32 blockStart;
|
||||
UGeckoInstruction next_inst; // for easy peephole opt.
|
||||
int blockSize;
|
||||
int instructionNumber;
|
||||
int downcountAmount;
|
||||
@ -142,10 +143,8 @@ namespace Jit64
|
||||
void fcmpx(UGeckoInstruction inst);
|
||||
void fmrx(UGeckoInstruction inst);
|
||||
|
||||
void cmpli(UGeckoInstruction inst);
|
||||
void cmpi(UGeckoInstruction inst);
|
||||
void cmpl(UGeckoInstruction inst);
|
||||
void cmp(UGeckoInstruction inst);
|
||||
void cmpXi(UGeckoInstruction inst);
|
||||
void cmpX(UGeckoInstruction inst);
|
||||
|
||||
void cntlzwx(UGeckoInstruction inst);
|
||||
|
||||
|
@ -324,18 +324,19 @@ void GenFifoXmm64Write()
|
||||
|
||||
void GenerateCommon()
|
||||
{
|
||||
// USES_CR
|
||||
computeRc = AlignCode16();
|
||||
AND(32, M(&CR), Imm32(0x0FFFFFFF));
|
||||
AND(32, M(&PowerPC::ppcState.cr), Imm32(0x0FFFFFFF));
|
||||
CMP(32, R(EAX), Imm8(0));
|
||||
FixupBranch pLesser = J_CC(CC_L);
|
||||
FixupBranch pGreater = J_CC(CC_G);
|
||||
OR(32, M(&CR), Imm32(0x20000000)); // _x86Reg == 0
|
||||
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x20000000)); // _x86Reg == 0
|
||||
RET();
|
||||
SetJumpTarget(pGreater);
|
||||
OR(32, M(&CR), Imm32(0x40000000)); // _x86Reg > 0
|
||||
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x40000000)); // _x86Reg > 0
|
||||
RET();
|
||||
SetJumpTarget(pLesser);
|
||||
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
|
||||
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000)); // _x86Reg < 0
|
||||
RET();
|
||||
|
||||
fifoDirectWrite8 = AlignCode4();
|
||||
|
@ -108,6 +108,7 @@ namespace Jit64
|
||||
// variants of this instruction.
|
||||
void bcx(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_CR
|
||||
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
@ -124,7 +125,7 @@ namespace Jit64
|
||||
|
||||
if ((inst.BO & 16) == 0) // Test a CR bit
|
||||
{
|
||||
TEST(32, M(&CR), Imm32(0x80000000 >> inst.BI));
|
||||
TEST(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000 >> inst.BI));
|
||||
if (inst.BO & 8) // Conditional branch
|
||||
branch = CC_NZ;
|
||||
else
|
||||
@ -181,14 +182,14 @@ namespace Jit64
|
||||
{
|
||||
skip = J_CC(branch);
|
||||
}
|
||||
u32 destination;
|
||||
if (inst.LK)
|
||||
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
|
||||
if(inst.AA)
|
||||
destination = SignExt16(inst.BD << 2);
|
||||
else
|
||||
destination = js.compilerPC + SignExt16(inst.BD << 2);
|
||||
WriteExit(destination, 0);
|
||||
u32 destination;
|
||||
if (inst.LK)
|
||||
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
|
||||
if(inst.AA)
|
||||
destination = SignExt16(inst.BD << 2);
|
||||
else
|
||||
destination = js.compilerPC + SignExt16(inst.BD << 2);
|
||||
WriteExit(destination, 0);
|
||||
if (inst.BO != 20)
|
||||
{
|
||||
SetJumpTarget(skip);
|
||||
|
@ -205,8 +205,8 @@ namespace Jit64
|
||||
{
|
||||
fpr.LoadToX64(a, true);
|
||||
}
|
||||
|
||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> shift)));
|
||||
// USES_CR
|
||||
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> shift)));
|
||||
if (ordered)
|
||||
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
||||
else
|
||||
@ -226,7 +226,7 @@ namespace Jit64
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
SHR(32, R(EAX), Imm8(shift));
|
||||
OR(32, M(&CR), R(EAX));
|
||||
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
#include "JitAsm.h"
|
||||
#include "Jit_Util.h"
|
||||
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
@ -32,10 +33,11 @@ namespace Jit64
|
||||
{
|
||||
// Assumes that the flags were just set through an addition.
|
||||
void GenerateCarry(X64Reg temp_reg) {
|
||||
// USES_XER
|
||||
SETcc(CC_C, R(temp_reg));
|
||||
AND(32, M(&XER), Imm32(~(1 << 29)));
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
|
||||
SHL(32, R(temp_reg), Imm8(29));
|
||||
OR(32, M(&XER), R(temp_reg));
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(temp_reg));
|
||||
}
|
||||
|
||||
typedef u32 (*Operation)(u32 a, u32 b);
|
||||
@ -133,26 +135,49 @@ namespace Jit64
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
if (js.next_inst.OPCD == 16) { // bcx
|
||||
if (!js.next_inst.LK && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG))
|
||||
{
|
||||
// it's clear there's plenty of opportunity.
|
||||
//PanicAlert("merge");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// unsigned
|
||||
void cmpli(UGeckoInstruction inst)
|
||||
void cmpXi(UGeckoInstruction inst)
|
||||
{
|
||||
// Should check if the next intruction is a branch - if it is, merge the two. This can save
|
||||
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
|
||||
// towards branches.
|
||||
// USES_CR
|
||||
#ifdef JIT_OFF_OPTIONS
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
#endif
|
||||
// Should check if the next intruction is a branch - if it is, merge the two. This can save
|
||||
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
|
||||
// towards branches.
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA;
|
||||
u32 uimm = inst.UIMM;
|
||||
int crf = inst.CRFD;
|
||||
int shift = crf * 4;
|
||||
Gen::CCFlags less_than, greater_than;
|
||||
OpArg comparand;
|
||||
if (inst.OPCD == 10) {
|
||||
less_than = CC_B;
|
||||
greater_than = CC_A;
|
||||
comparand = Imm32(inst.UIMM);
|
||||
} else {
|
||||
less_than = CC_L;
|
||||
greater_than = CC_G;
|
||||
comparand = Imm32((s32)(s16)inst.UIMM);
|
||||
}
|
||||
|
||||
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
|
||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
|
||||
CMP(32, gpr.R(a), Imm32(uimm));
|
||||
FixupBranch pLesser = J_CC(CC_B);
|
||||
FixupBranch pGreater = J_CC(CC_A);
|
||||
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
|
||||
CMP(32, gpr.R(a), comparand);
|
||||
FixupBranch pLesser = J_CC(less_than);
|
||||
FixupBranch pGreater = J_CC(greater_than);
|
||||
|
||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0
|
||||
FixupBranch continue1 = J();
|
||||
@ -165,44 +190,17 @@ namespace Jit64
|
||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));// _x86Reg < 0
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
OR(32, M(&CR), R(EAX));
|
||||
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
|
||||
// TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches.
|
||||
// Since it's the last block, some liberties can be taken.
|
||||
// don't forget to flush registers AFTER the cmp BEFORE the jmp. Flushing doesn't affect flags.
|
||||
}
|
||||
|
||||
// signed
|
||||
void cmpi(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef JIT_OFF_OPTIONS
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
#endif
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA;
|
||||
s32 simm = (s32)(s16)inst.UIMM;
|
||||
int crf = inst.CRFD;
|
||||
int shift = crf * 4;
|
||||
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
|
||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
|
||||
CMP(32, gpr.R(a), Imm32(simm));
|
||||
FixupBranch pLesser = J_CC(CC_L);
|
||||
FixupBranch pGreater = J_CC(CC_G);
|
||||
// _x86Reg == 0
|
||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
|
||||
FixupBranch continue1 = J();
|
||||
// _x86Reg > 0
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
|
||||
FixupBranch continue2 = J();
|
||||
// _x86Reg < 0
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
OR(32, M(&CR), R(EAX));
|
||||
}
|
||||
|
||||
// signed
|
||||
void cmp(UGeckoInstruction inst)
|
||||
void cmpX(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_CR
|
||||
#ifdef JIT_OFF_OPTIONS
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
@ -212,12 +210,21 @@ namespace Jit64
|
||||
int b = inst.RB;
|
||||
int crf = inst.CRFD;
|
||||
int shift = crf * 4;
|
||||
Gen::CCFlags less_than, greater_than;
|
||||
Gen::OpArg comparand = gpr.R(b);
|
||||
if (inst.SUBOP10 == 32) {
|
||||
less_than = CC_B;
|
||||
greater_than = CC_A;
|
||||
} else {
|
||||
less_than = CC_L;
|
||||
greater_than = CC_G;
|
||||
}
|
||||
gpr.Lock(a, b);
|
||||
gpr.LoadToX64(a, true, false);
|
||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
|
||||
CMP(32, gpr.R(a), gpr.R(b));
|
||||
FixupBranch pLesser = J_CC(CC_L);
|
||||
FixupBranch pGreater = J_CC(CC_G);
|
||||
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
|
||||
CMP(32, gpr.R(a), comparand);
|
||||
FixupBranch pLesser = J_CC(less_than);
|
||||
FixupBranch pGreater = J_CC(greater_than);
|
||||
// _x86Reg == 0
|
||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
|
||||
FixupBranch continue1 = J();
|
||||
@ -230,41 +237,7 @@ namespace Jit64
|
||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
OR(32, M(&CR), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
// unsigned
|
||||
void cmpl(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef JIT_OFF_OPTIONS
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
#endif
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
int crf = inst.CRFD;
|
||||
int shift = crf * 4;
|
||||
gpr.Lock(a, b);
|
||||
gpr.LoadToX64(a, true, false);
|
||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
|
||||
CMP(32, gpr.R(a), gpr.R(b));
|
||||
FixupBranch pLesser = J_CC(CC_B);
|
||||
FixupBranch pGreater = J_CC(CC_A);
|
||||
// _x86Reg == 0
|
||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
|
||||
FixupBranch continue1 = J();
|
||||
// _x86Reg > 0
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
|
||||
FixupBranch continue2 = J();
|
||||
// _x86Reg < 0
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
OR(32, M(&CR), R(EAX));
|
||||
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
@ -652,6 +625,7 @@ namespace Jit64
|
||||
// This can be optimized
|
||||
void addex(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_XER
|
||||
#ifdef JIT_OFF_OPTIONS
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
@ -664,7 +638,7 @@ namespace Jit64
|
||||
gpr.LoadToX64(d, false);
|
||||
else
|
||||
gpr.LoadToX64(d, true);
|
||||
MOV(32, R(EAX), M(&XER));
|
||||
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
|
||||
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
ADC(32, R(EAX), gpr.R(b));
|
||||
@ -895,6 +869,7 @@ namespace Jit64
|
||||
|
||||
void srawx(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_XER
|
||||
#ifdef JIT_OFF_OPTIONS
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
@ -919,17 +894,17 @@ namespace Jit64
|
||||
CMP(32, R(EAX), Imm32(-1));
|
||||
SETcc(CC_L, R(EAX));
|
||||
SAR(32, gpr.R(a), R(ECX));
|
||||
AND(32, M(&XER), Imm32(~(1 << 29)));
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
|
||||
SHL(32, R(EAX), Imm8(29));
|
||||
OR(32, M(&XER), R(EAX));
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
|
||||
FixupBranch end = J();
|
||||
SetJumpTarget(topBitSet);
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
SAR(32, R(EAX), Imm8(31));
|
||||
MOV(32, gpr.R(a), R(EAX));
|
||||
AND(32, M(&XER), Imm32(~(1 << 29)));
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
|
||||
AND(32, R(EAX), Imm32(1<<29));
|
||||
OR(32, M(&XER), R(EAX));
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
|
||||
SetJumpTarget(end);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
@ -961,11 +936,11 @@ namespace Jit64
|
||||
FixupBranch nocarry1 = J_CC(CC_GE);
|
||||
TEST(32, R(EAX), Imm32((u32)0xFFFFFFFF >> (32 - amount))); // were any 1s shifted out?
|
||||
FixupBranch nocarry2 = J_CC(CC_Z);
|
||||
OR(32, M(&XER), Imm32(XER_CA_MASK)); //XER.CA = 1
|
||||
JitSetCA();
|
||||
FixupBranch carry = J(false);
|
||||
SetJumpTarget(nocarry1);
|
||||
SetJumpTarget(nocarry2);
|
||||
AND(32, M(&XER), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||
JitClearCA();
|
||||
SetJumpTarget(carry);
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
@ -973,7 +948,7 @@ namespace Jit64
|
||||
{
|
||||
Default(inst); return;
|
||||
gpr.Lock(a, s);
|
||||
AND(32, M(&XER), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||
JitClearCA();
|
||||
gpr.LoadToX64(a, a == s, true);
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), gpr.R(s));
|
||||
|
@ -37,6 +37,16 @@
|
||||
namespace Jit64
|
||||
{
|
||||
|
||||
void JitClearCA()
|
||||
{
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||
}
|
||||
|
||||
void JitSetCA()
|
||||
{
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
|
||||
}
|
||||
|
||||
void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
|
@ -33,4 +33,7 @@ void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
void ForceSinglePrecisionS(X64Reg xmm);
|
||||
void ForceSinglePrecisionP(X64Reg xmm);
|
||||
|
||||
void JitClearCA();
|
||||
void JitSetCA();
|
||||
|
||||
} // namespace
|
||||
|
@ -285,19 +285,10 @@ void FixUpInternalBranches(CodeOp *code, int begin, int end)
|
||||
}
|
||||
}
|
||||
|
||||
void ShuffleUp(CodeOp *code, int first, int last)
|
||||
{
|
||||
CodeOp temp = code[first];
|
||||
for (int i = first; i < last; i++)
|
||||
code[i] = code[i + 1];
|
||||
code[last] = temp;
|
||||
}
|
||||
|
||||
// IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE
|
||||
bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||
{
|
||||
// Disabled for now
|
||||
return false;
|
||||
return false; // Currently deactivated in SVN.
|
||||
|
||||
const GekkoOPInfo *a_info = GetOpInfo(a.inst);
|
||||
const GekkoOPInfo *b_info = GetOpInfo(b.inst);
|
||||
@ -308,7 +299,6 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1))
|
||||
return false;
|
||||
|
||||
// 10 cmpi, 11 cmpli - we got a compare!
|
||||
switch (b.inst.OPCD)
|
||||
{
|
||||
case 16:
|
||||
@ -323,20 +313,34 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||
// For now, only integer ops acceptable.
|
||||
switch (b_info->type) {
|
||||
case OPTYPE_INTEGER:
|
||||
case OPTYPE_LOAD:
|
||||
case OPTYPE_STORE:
|
||||
case OPTYPE_LOADFP:
|
||||
case OPTYPE_STOREFP:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that we have no register collisions.
|
||||
// That is, check that none of b's outputs matches any of a's inputs,
|
||||
// and that none of a's outputs matches any of b's inputs.
|
||||
// The latter does not apply if a is a cmp, of course, but doesn't hurt to check.
|
||||
bool no_swap = false;
|
||||
for (int j = 0; j < 3; j++)
|
||||
{
|
||||
int regIn = a.regsIn[j];
|
||||
if (regIn < 0)
|
||||
continue;
|
||||
if (b.regsOut[0] == regIn ||
|
||||
b.regsOut[1] == regIn)
|
||||
int regInA = a.regsIn[j];
|
||||
int regInB = b.regsIn[j];
|
||||
if (regInA >= 0 &&
|
||||
b.regsOut[0] == regInA ||
|
||||
b.regsOut[1] == regInA)
|
||||
{
|
||||
// reg collision! don't swap
|
||||
return false;
|
||||
}
|
||||
if (regInB >= 0 &&
|
||||
a.regsOut[0] == regInB ||
|
||||
a.regsOut[1] == regInB)
|
||||
{
|
||||
// reg collision! don't swap
|
||||
return false;
|
||||
@ -346,6 +350,7 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Does not yet perform inlining - although there are plans for that.
|
||||
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa)
|
||||
{
|
||||
int numCycles = 0;
|
||||
@ -623,9 +628,8 @@ CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Scan for CR0 dependency
|
||||
//assume next block wants CR0 to be safe
|
||||
// Scan for CR0 dependency
|
||||
// assume next block wants CR0 to be safe
|
||||
bool wantsCR0 = true;
|
||||
bool wantsCR1 = true;
|
||||
bool wantsPS1 = true;
|
||||
|
@ -79,8 +79,6 @@ struct BlockRegStats
|
||||
void Init();
|
||||
void Shutdown();
|
||||
|
||||
void ShuffleUp(CodeOp *code, int first, int last);
|
||||
|
||||
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa);
|
||||
|
||||
void LogFunctionCall(u32 addr);
|
||||
|
@ -141,8 +141,8 @@ GekkoOPTemplate primarytable[] =
|
||||
|
||||
{7, Interpreter::mulli, Jit64::mulli, {"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
|
||||
{8, Interpreter::subfic, Jit64::subfic, {"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
|
||||
{10, Interpreter::cmpli, Jit64::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
||||
{11, Interpreter::cmpi, Jit64::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
||||
{10, Interpreter::cmpli, Jit64::cmpXi, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
||||
{11, Interpreter::cmpi, Jit64::cmpXi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
||||
{12, Interpreter::addic, Jit64::reg_imm, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
|
||||
{13, Interpreter::addic_rc, Jit64::reg_imm, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
|
||||
{14, Interpreter::addi, Jit64::reg_imm, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
|
||||
@ -283,8 +283,8 @@ GekkoOPTemplate table31[] =
|
||||
{412, Interpreter::orcx, Jit64::Default, {"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
||||
{476, Interpreter::nandx, Jit64::Default, {"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
||||
{284, Interpreter::eqvx, Jit64::Default, {"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
||||
{0, Interpreter::cmp, Jit64::cmp, {"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
|
||||
{32, Interpreter::cmpl, Jit64::cmpl, {"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
|
||||
{0, Interpreter::cmp, Jit64::cmpX, {"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
|
||||
{32, Interpreter::cmpl, Jit64::cmpX, {"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
|
||||
{26, Interpreter::cntlzwx, Jit64::cntlzwx, {"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||
{922, Interpreter::extshx, Jit64::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||
{954, Interpreter::extsbx, Jit64::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||
|
@ -25,77 +25,75 @@ class PointerWrap;
|
||||
|
||||
namespace PowerPC
|
||||
{
|
||||
enum CoreMode
|
||||
{
|
||||
MODE_INTERPRETER,
|
||||
MODE_JIT,
|
||||
};
|
||||
|
||||
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
|
||||
struct GC_ALIGNED64(PowerPCState)
|
||||
{
|
||||
u32 mojs[128]; // Try to isolate the regs from other variables in the cache.
|
||||
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
|
||||
enum CoreMode
|
||||
{
|
||||
MODE_INTERPRETER,
|
||||
MODE_JIT,
|
||||
};
|
||||
|
||||
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
|
||||
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
|
||||
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
|
||||
u64 ps[32][2];
|
||||
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
|
||||
struct GC_ALIGNED64(PowerPCState)
|
||||
{
|
||||
u32 mojs[128]; // Try to isolate the regs from other variables in the cache.
|
||||
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
|
||||
|
||||
u32 pc; // program counter
|
||||
u32 npc;
|
||||
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
|
||||
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
|
||||
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
|
||||
u64 ps[32][2];
|
||||
|
||||
u32 cr; // flags
|
||||
u32 msr; // machine specific register
|
||||
u32 fpscr; // floating point flags/status bits
|
||||
u32 pc; // program counter
|
||||
u32 npc;
|
||||
|
||||
// Exception management.
|
||||
u32 Exceptions;
|
||||
u32 cr; // flags
|
||||
u32 msr; // machine specific register
|
||||
u32 fpscr; // floating point flags/status bits
|
||||
|
||||
u32 sr[16]; // Segment registers. Unused.
|
||||
// Exception management.
|
||||
u32 Exceptions;
|
||||
|
||||
u32 DebugCount;
|
||||
|
||||
// special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
|
||||
// also for power management, but we don't care about that.
|
||||
u32 spr[1024];
|
||||
};
|
||||
u32 sr[16]; // Segment registers. Unused.
|
||||
|
||||
enum CPUState
|
||||
{
|
||||
CPU_RUNNING = 0,
|
||||
CPU_RUNNINGDEBUG = 1,
|
||||
CPU_STEPPING = 2,
|
||||
CPU_POWERDOWN = 3,
|
||||
};
|
||||
u32 DebugCount;
|
||||
|
||||
// special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
|
||||
// also for power management, but we don't care about that.
|
||||
u32 spr[1024];
|
||||
};
|
||||
|
||||
extern PowerPCState ppcState;
|
||||
extern volatile CPUState state; // Execution engines should poll this to know when to exit.
|
||||
enum CPUState
|
||||
{
|
||||
CPU_RUNNING = 0,
|
||||
CPU_RUNNINGDEBUG = 1,
|
||||
CPU_STEPPING = 2,
|
||||
CPU_POWERDOWN = 3,
|
||||
};
|
||||
|
||||
void Init();
|
||||
void Shutdown();
|
||||
void DoState(PointerWrap &p);
|
||||
extern PowerPCState ppcState;
|
||||
extern volatile CPUState state; // Execution engines should poll this to know when to exit.
|
||||
|
||||
void SetMode(CoreMode _coreType);
|
||||
void Init();
|
||||
void Shutdown();
|
||||
void DoState(PointerWrap &p);
|
||||
|
||||
void SingleStep();
|
||||
void CheckExceptions();
|
||||
void RunLoop();
|
||||
void Start();
|
||||
void Pause();
|
||||
void Stop();
|
||||
void SetMode(CoreMode _coreType);
|
||||
|
||||
void OnIdle(u32 _uThreadAddr);
|
||||
}
|
||||
void SingleStep();
|
||||
void CheckExceptions();
|
||||
void RunLoop();
|
||||
void Start();
|
||||
void Pause();
|
||||
void Stop();
|
||||
|
||||
// Easy register access macros.
|
||||
void OnIdle(u32 _uThreadAddr);
|
||||
|
||||
// Easy register access macros.
|
||||
#define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2])
|
||||
#define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU])
|
||||
#define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL])
|
||||
#define XER ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER])
|
||||
#define PC PowerPC::ppcState.pc
|
||||
#define NPC PowerPC::ppcState.npc
|
||||
#define CR PowerPC::ppcState.cr
|
||||
#define FPSCR ((UReg_FPSCR&)PowerPC::ppcState.fpscr)
|
||||
#define MSR PowerPC::ppcState.msr
|
||||
#define GPR(n) PowerPC::ppcState.gpr[n]
|
||||
@ -121,11 +119,13 @@ namespace PowerPC
|
||||
#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
|
||||
#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
|
||||
|
||||
} // namespace
|
||||
|
||||
// Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits
|
||||
// to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we
|
||||
// need the corresponding stuff on the JIT side too.
|
||||
|
||||
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
|
||||
inline void SetCRField(int cr_field, int value) {
|
||||
PowerPC::ppcState.cr = (PowerPC::ppcState.cr & (~(0xF0000000 >> (cr_field * 4)))) | (value << ((7 - cr_field) * 4));
|
||||
}
|
||||
@ -135,9 +135,10 @@ inline u32 GetCRField(int cr_field) {
|
||||
}
|
||||
|
||||
inline u32 GetCRBit(int bit) {
|
||||
return (CR >> (31 - bit)) & 1;
|
||||
return (PowerPC::ppcState.cr >> (31 - bit)) & 1;
|
||||
}
|
||||
|
||||
// SetCR and GetCR may become fairly slow soon. Should be avoided if possible.
|
||||
inline void SetCR(u32 new_cr) {
|
||||
PowerPC::ppcState.cr = new_cr;
|
||||
}
|
||||
@ -146,12 +147,29 @@ inline u32 GetCR() {
|
||||
return PowerPC::ppcState.cr;
|
||||
}
|
||||
|
||||
// SetCarry/GetCarry may speed up soon.
|
||||
inline void SetCarry(int ca) {
|
||||
XER.CA = ca;
|
||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca;
|
||||
}
|
||||
|
||||
inline int GetCarry() {
|
||||
return XER.CA;
|
||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA;
|
||||
}
|
||||
|
||||
inline UReg_XER GetXER() {
|
||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]);
|
||||
}
|
||||
|
||||
inline void SetXER(UReg_XER new_xer) {
|
||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer;
|
||||
}
|
||||
|
||||
inline int GetXER_SO() {
|
||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO;
|
||||
}
|
||||
|
||||
inline void SetXER_SO(int value) {
|
||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user