mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-02-04 15:40:02 +00:00
Merge pull request #1021 from FioraAeterna/optimizeca3
JIT: Carry optimizations!
This commit is contained in:
commit
db7617248f
@ -34,7 +34,7 @@ static GekkoOPTemplate primarytable[] =
|
||||
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
||||
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
||||
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
|
||||
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0, 1, 0, 0, 0}},
|
||||
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0, 1, 0, 0, 0}},
|
||||
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
||||
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
||||
|
||||
@ -180,8 +180,8 @@ static GekkoOPTemplate table31[] =
|
||||
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
|
||||
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
|
||||
@ -260,7 +260,7 @@ static GekkoOPTemplate table31[] =
|
||||
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
|
||||
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
|
||||
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
|
||||
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}},
|
||||
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}},
|
||||
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
||||
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
||||
|
||||
@ -280,26 +280,26 @@ static GekkoOPTemplate table31[] =
|
||||
static GekkoOPTemplate table31_2[] =
|
||||
{
|
||||
{266, Interpreter::addx, {"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{778, Interpreter::addx, {"addox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{778, Interpreter::addx, {"addox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{10, Interpreter::addcx, {"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{522, Interpreter::addcx, {"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{522, Interpreter::addcx, {"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{138, Interpreter::addex, {"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{650, Interpreter::addex, {"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{650, Interpreter::addex, {"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
|
||||
{234, Interpreter::addmex, {"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{202, Interpreter::addzex, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{491, Interpreter::divwx, {"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
|
||||
{1003, Interpreter::divwx, {"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
|
||||
{1003, Interpreter::divwx, {"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 40, 0, 0, 0}},
|
||||
{459, Interpreter::divwux, {"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
|
||||
{971, Interpreter::divwux, {"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
|
||||
{971, Interpreter::divwux, {"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 40, 0, 0, 0}},
|
||||
{75, Interpreter::mulhwx, {"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
|
||||
{11, Interpreter::mulhwux, {"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
|
||||
{235, Interpreter::mullwx, {"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
|
||||
{747, Interpreter::mullwx, {"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
|
||||
{747, Interpreter::mullwx, {"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 5, 0, 0, 0}},
|
||||
{104, Interpreter::negx, {"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{40, Interpreter::subfx, {"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{552, Interpreter::subfx, {"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{552, Interpreter::subfx, {"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
|
||||
{8, Interpreter::subfcx, {"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{520, Interpreter::subfcx, {"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{520, Interpreter::subfcx, {"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
|
||||
{136, Interpreter::subfex, {"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{232, Interpreter::subfmex, {"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{200, Interpreter::subfzex, {"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
|
@ -178,6 +178,8 @@ void Jit64::Init()
|
||||
code_block.m_gpa = &js.gpa;
|
||||
code_block.m_fpa = &js.fpa;
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
||||
}
|
||||
|
||||
void Jit64::ClearCache()
|
||||
@ -461,6 +463,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
|
||||
|
||||
js.skipnext = false;
|
||||
js.carryFlagSet = false;
|
||||
js.carryFlagInverted = false;
|
||||
js.compilerPC = nextPC;
|
||||
// Translate instructions
|
||||
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||
@ -492,6 +496,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
js.next_compilerPC = ops[i + 1].address;
|
||||
js.next_op = &ops[i + 1];
|
||||
}
|
||||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||
|
@ -101,6 +101,8 @@ public:
|
||||
void GenerateConstantOverflow(s64 val);
|
||||
void GenerateOverflow();
|
||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||
void FinalizeCarry(Gen::CCFlags cond);
|
||||
void FinalizeCarry(bool ca);
|
||||
void ComputeRC(const Gen::OpArg & arg);
|
||||
|
||||
// Use to extract bytes from a register using the regcache. offset is in bytes.
|
||||
@ -139,7 +141,7 @@ public:
|
||||
void DynaRunTable63(UGeckoInstruction _inst);
|
||||
|
||||
void addx(UGeckoInstruction inst);
|
||||
void addcx(UGeckoInstruction inst);
|
||||
void arithcx(UGeckoInstruction inst);
|
||||
void mulli(UGeckoInstruction inst);
|
||||
void mulhwXx(UGeckoInstruction inst);
|
||||
void mullwx(UGeckoInstruction inst);
|
||||
@ -147,9 +149,7 @@ public:
|
||||
void divwx(UGeckoInstruction inst);
|
||||
void srawix(UGeckoInstruction inst);
|
||||
void srawx(UGeckoInstruction inst);
|
||||
void addex(UGeckoInstruction inst);
|
||||
void addmex(UGeckoInstruction inst);
|
||||
void addzex(UGeckoInstruction inst);
|
||||
void arithXex(UGeckoInstruction inst);
|
||||
|
||||
void extsXx(UGeckoInstruction inst);
|
||||
|
||||
@ -217,11 +217,7 @@ public:
|
||||
void dcbz(UGeckoInstruction inst);
|
||||
|
||||
void subfic(UGeckoInstruction inst);
|
||||
void subfcx(UGeckoInstruction inst);
|
||||
void subfx(UGeckoInstruction inst);
|
||||
void subfex(UGeckoInstruction inst);
|
||||
void subfmex(UGeckoInstruction inst);
|
||||
void subfzex(UGeckoInstruction inst);
|
||||
|
||||
void twx(UGeckoInstruction inst);
|
||||
|
||||
|
@ -48,7 +48,7 @@ static GekkoOPTemplate primarytable[] =
|
||||
{10, &Jit64::cmpXX}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
||||
{11, &Jit64::cmpXX}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
||||
{12, &Jit64::reg_imm}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
|
||||
{13, &Jit64::reg_imm}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
|
||||
{13, &Jit64::reg_imm}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0}},
|
||||
{14, &Jit64::reg_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
|
||||
{15, &Jit64::reg_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
|
||||
|
||||
@ -193,8 +193,8 @@ static GekkoOPTemplate table31[] =
|
||||
{922, &Jit64::extsXx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||
{954, &Jit64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
|
||||
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
|
||||
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||
|
||||
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
|
||||
@ -273,7 +273,7 @@ static GekkoOPTemplate table31[] =
|
||||
{339, &Jit64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
|
||||
{467, &Jit64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}},
|
||||
{371, &Jit64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
|
||||
{512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}},
|
||||
{512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA}},
|
||||
{595, &Jit64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
|
||||
{659, &Jit64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},
|
||||
|
||||
@ -294,12 +294,12 @@ static GekkoOPTemplate table31_2[] =
|
||||
{
|
||||
{266, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
|
||||
{778, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
|
||||
{10, &Jit64::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||
{522, &Jit64::addcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||
{138, &Jit64::addex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{650, &Jit64::addex}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{234, &Jit64::addmex}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{202, &Jit64::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{10, &Jit64::arithcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||
{522, &Jit64::arithcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||
{138, &Jit64::arithXex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{650, &Jit64::arithXex}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{234, &Jit64::arithXex}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{202, &Jit64::arithXex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{491, &Jit64::divwx}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
|
||||
{1003, &Jit64::divwx}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
|
||||
{459, &Jit64::divwux}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
|
||||
@ -311,11 +311,11 @@ static GekkoOPTemplate table31_2[] =
|
||||
{104, &Jit64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
|
||||
{40, &Jit64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
|
||||
{552, &Jit64::subfx}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
|
||||
{8, &Jit64::subfcx}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||
{520, &Jit64::subfcx}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||
{136, &Jit64::subfex}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{232, &Jit64::subfmex}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{200, &Jit64::subfzex}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{8, &Jit64::arithcx}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||
{520, &Jit64::arithcx}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
|
||||
{136, &Jit64::arithXex}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{232, &Jit64::arithXex}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
{200, &Jit64::arithXex}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
|
||||
};
|
||||
|
||||
static GekkoOPTemplate table59[] =
|
||||
|
@ -44,28 +44,76 @@ void Jit64::GenerateOverflow()
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
void Jit64::FinalizeCarry(CCFlags cond)
|
||||
{
|
||||
js.carryFlagSet = false;
|
||||
js.carryFlagInverted = false;
|
||||
if (js.op->wantsCA)
|
||||
{
|
||||
if (js.next_op->wantsCAInFlags)
|
||||
{
|
||||
if (cond == CC_C || cond == CC_NC)
|
||||
{
|
||||
js.carryFlagInverted = cond == CC_NC;
|
||||
}
|
||||
else
|
||||
{
|
||||
// convert the condition to a carry flag (is there a better way?)
|
||||
SETcc(cond, R(RSCRATCH));
|
||||
BT(8, R(RSCRATCH), Imm8(0));
|
||||
}
|
||||
js.carryFlagSet = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
JitSetCAIf(cond);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unconditional version
|
||||
void Jit64::FinalizeCarry(bool ca)
|
||||
{
|
||||
js.carryFlagSet = false;
|
||||
js.carryFlagInverted = false;
|
||||
if (js.op->wantsCA)
|
||||
{
|
||||
if (js.next_op->wantsCAInFlags)
|
||||
{
|
||||
if (ca)
|
||||
STC();
|
||||
else
|
||||
CLC();
|
||||
js.carryFlagSet = true;
|
||||
}
|
||||
else if (ca)
|
||||
{
|
||||
JitSetCA();
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(true, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assumes CA,OV are clear
|
||||
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
||||
{
|
||||
// USES_XER
|
||||
if (oe)
|
||||
{
|
||||
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both
|
||||
// sides of the branch.
|
||||
// Make sure not to lose the carry flags (not a big deal, this path is rare).
|
||||
PUSHF();
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK)));
|
||||
FixupBranch jno = J_CC(CC_NO);
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
//XER[OV/SO] = 1
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(jno);
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Do carry
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
POPF();
|
||||
}
|
||||
// Do carry
|
||||
FinalizeCarry(inv ? CC_NC : CC_C);
|
||||
}
|
||||
|
||||
void Jit64::ComputeRC(const Gen::OpArg & arg)
|
||||
@ -129,10 +177,10 @@ static u32 Xor(u32 a, u32 b)
|
||||
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
|
||||
{
|
||||
gpr.Lock(d, a);
|
||||
if (a || binary || carry) // yeh nasty special case addic
|
||||
// Be careful; addic treats r0 as r0, but addi treats r0 as zero.
|
||||
if (a || binary || carry)
|
||||
{
|
||||
if (carry)
|
||||
JitClearCAOV(false);
|
||||
carry &= js.op->wantsCA;
|
||||
if (gpr.R(a).IsImm() && !carry)
|
||||
{
|
||||
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
|
||||
@ -156,7 +204,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
|
||||
}
|
||||
}
|
||||
if (carry)
|
||||
JitSetCAIf(CC_C);
|
||||
FinalizeCarry(CC_C);
|
||||
if (Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
}
|
||||
@ -239,6 +287,9 @@ void Jit64::reg_imm(UGeckoInstruction inst)
|
||||
|
||||
bool Jit64::CheckMergedBranch(int crf)
|
||||
{
|
||||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
|
||||
return false;
|
||||
|
||||
const UGeckoInstruction& next = js.next_inst;
|
||||
return (((next.OPCD == 16 /* bcx */) ||
|
||||
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
|
||||
@ -721,148 +772,36 @@ void Jit64::subfic(UGeckoInstruction inst)
|
||||
{
|
||||
if (imm == 0)
|
||||
{
|
||||
JitClearCAOV(false);
|
||||
// Flags act exactly like subtracting from 0
|
||||
NEG(32, gpr.R(d));
|
||||
// Output carry is inverted
|
||||
JitSetCAIf(CC_NC);
|
||||
FinalizeCarry(CC_NC);
|
||||
}
|
||||
else if (imm == -1)
|
||||
{
|
||||
// CA is always set in this case
|
||||
JitSetCA();
|
||||
NOT(32, gpr.R(d));
|
||||
// CA is always set in this case
|
||||
FinalizeCarry(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(false);
|
||||
NOT(32, gpr.R(d));
|
||||
ADD(32, gpr.R(d), Imm32(imm+1));
|
||||
// Output carry is normal
|
||||
JitSetCAIf(CC_C);
|
||||
FinalizeCarry(CC_C);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(false);
|
||||
MOV(32, gpr.R(d), Imm32(imm));
|
||||
SUB(32, gpr.R(d), gpr.R(a));
|
||||
// Output carry is inverted
|
||||
JitSetCAIf(CC_NC);
|
||||
FinalizeCarry(CC_NC);
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
// This instruction has no RC flag
|
||||
}
|
||||
|
||||
void Jit64::subfcx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
JITDISABLE(bJITIntegerOff);
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, (d == a || d == b), true);
|
||||
|
||||
JitClearCAOV(inst.OE);
|
||||
if (d == b)
|
||||
{
|
||||
SUB(32, gpr.R(d), gpr.R(a));
|
||||
}
|
||||
else if (d == a)
|
||||
{
|
||||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
MOV(32, gpr.R(d), gpr.R(b));
|
||||
SUB(32, gpr.R(d), R(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, gpr.R(d), gpr.R(b));
|
||||
SUB(32, gpr.R(d), gpr.R(a));
|
||||
}
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
FinalizeCarryOverflow(inst.OE, true);
|
||||
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::subfex(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
JITDISABLE(bJITIntegerOff);
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, (d == a || d == b), true);
|
||||
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
|
||||
bool invertedCarry = false;
|
||||
if (d == b)
|
||||
{
|
||||
// Convert carry to borrow
|
||||
CMC();
|
||||
SBB(32, gpr.R(d), gpr.R(a));
|
||||
invertedCarry = true;
|
||||
}
|
||||
else if (d == a)
|
||||
{
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), gpr.R(b));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), gpr.R(b));
|
||||
}
|
||||
FinalizeCarryOverflow(inst.OE, invertedCarry);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::subfmex(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_XER
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITIntegerOff);
|
||||
int a = inst.RA, d = inst.RD;
|
||||
gpr.Lock(a, d);
|
||||
gpr.BindToRegister(d, d == a);
|
||||
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
if (d != a)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::subfzex(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_XER
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITIntegerOff);
|
||||
int a = inst.RA, d = inst.RD;
|
||||
|
||||
gpr.Lock(a, d);
|
||||
gpr.BindToRegister(d, d == a);
|
||||
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
if (d != a)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), Imm8(0));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::subfx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
@ -1329,96 +1268,93 @@ void Jit64::addx(UGeckoInstruction inst)
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::addex(UGeckoInstruction inst)
|
||||
void Jit64::arithXex(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_XER
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITIntegerOff);
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
bool regsource = !(inst.SUBOP10 & 64); // addex or subfex
|
||||
bool mex = !!(inst.SUBOP10 & 32); // addmex/subfmex or addzex/subfzex
|
||||
bool add = !!(inst.SUBOP10 & 2); // add or sub
|
||||
int a = inst.RA;
|
||||
int b = regsource ? inst.RB : a;
|
||||
int d = inst.RD;
|
||||
bool same_input_sub = !add && regsource && a == b;
|
||||
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, (d == a) || (d == b));
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
if ((d == a) || (d == b))
|
||||
gpr.BindToRegister(d, !same_input_sub && (d == a || d == b));
|
||||
if (!js.carryFlagSet)
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
|
||||
bool invertedCarry = false;
|
||||
// Special case: subfe A, B, B is a common compiler idiom
|
||||
if (same_input_sub)
|
||||
{
|
||||
ADC(32, gpr.R(d), gpr.R((d == a) ? b : a));
|
||||
// Convert carry to borrow
|
||||
if (!js.carryFlagInverted)
|
||||
CMC();
|
||||
SBB(32, gpr.R(d), gpr.R(d));
|
||||
invertedCarry = true;
|
||||
}
|
||||
else if (!add && regsource && d == b)
|
||||
{
|
||||
if (!js.carryFlagInverted)
|
||||
CMC();
|
||||
if (d != b)
|
||||
MOV(32, gpr.R(d), gpr.R(b));
|
||||
SBB(32, gpr.R(d), gpr.R(a));
|
||||
invertedCarry = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), gpr.R(b));
|
||||
OpArg source = regsource ? gpr.R(d == b ? a : b) : Imm32(mex ? 0xFFFFFFFF : 0);
|
||||
if (js.carryFlagInverted)
|
||||
CMC();
|
||||
if (d != a && d != b)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
if (!add)
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), source);
|
||||
}
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(inst.OE, invertedCarry);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::addcx(UGeckoInstruction inst)
|
||||
void Jit64::arithcx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITIntegerOff);
|
||||
bool add = !!(inst.SUBOP10 & 2); // add or sub
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, d == a || d == b, true);
|
||||
|
||||
if ((d == a) || (d == b))
|
||||
if (d == a && d != b)
|
||||
{
|
||||
int operand = ((d == a) ? b : a);
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, true);
|
||||
JitClearCAOV(inst.OE);
|
||||
ADD(32, gpr.R(d), gpr.R(operand));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
if (add)
|
||||
{
|
||||
ADD(32, gpr.R(d), gpr.R(b));
|
||||
}
|
||||
else
|
||||
{
|
||||
// special case, because sub isn't reversible
|
||||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
MOV(32, gpr.R(d), gpr.R(b));
|
||||
SUB(32, gpr.R(d), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, false);
|
||||
JitClearCAOV(inst.OE);
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADD(32, gpr.R(d), gpr.R(b));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
if (d != b)
|
||||
MOV(32, gpr.R(d), gpr.R(b));
|
||||
if (add)
|
||||
ADD(32, gpr.R(d), gpr.R(a));
|
||||
else
|
||||
SUB(32, gpr.R(d), gpr.R(a));
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::addmex(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_XER
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITIntegerOff);
|
||||
int a = inst.RA, d = inst.RD;
|
||||
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, d == a);
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
if (d != a)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::addzex(UGeckoInstruction inst)
|
||||
{
|
||||
// USES_XER
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITIntegerOff);
|
||||
int a = inst.RA, d = inst.RD;
|
||||
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, d == a);
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
if (d != a)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), Imm8(0));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(inst.OE, !add);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
@ -1811,16 +1747,22 @@ void Jit64::srawx(UGeckoInstruction inst)
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, s, b);
|
||||
gpr.BindToRegister(a, (a == s || a == b), true);
|
||||
JitClearCAOV(false);
|
||||
MOV(32, R(ECX), gpr.R(b));
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), gpr.R(s));
|
||||
SHL(64, gpr.R(a), Imm8(32));
|
||||
SAR(64, gpr.R(a), R(ECX));
|
||||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
TEST(32, gpr.R(a), R(RSCRATCH));
|
||||
JitSetCAIf(CC_NZ);
|
||||
if (js.op->wantsCA)
|
||||
{
|
||||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
TEST(32, gpr.R(a), R(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
}
|
||||
FinalizeCarry(CC_NZ);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
if (inst.Rc)
|
||||
@ -1838,41 +1780,50 @@ void Jit64::srawix(UGeckoInstruction inst)
|
||||
{
|
||||
gpr.Lock(a, s);
|
||||
gpr.BindToRegister(a, a == s, true);
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH));
|
||||
// some optimized common cases that can be done in slightly fewer ops
|
||||
if (amount == 31)
|
||||
if (!js.op->wantsCA)
|
||||
{
|
||||
JitSetCA();
|
||||
SAR(32, gpr.R(a), Imm8(31));
|
||||
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
|
||||
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
|
||||
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
|
||||
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
|
||||
}
|
||||
else if (amount == 1)
|
||||
{
|
||||
JitClearCAOV(false);
|
||||
SHR(32, R(RSCRATCH), Imm8(31)); // sign
|
||||
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
|
||||
SAR(32, gpr.R(a), Imm8(1));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), gpr.R(s));
|
||||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(false);
|
||||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
SHL(32, R(RSCRATCH), Imm8(32 - amount));
|
||||
TEST(32, R(RSCRATCH), gpr.R(a));
|
||||
JitSetCAIf(CC_NZ);
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH));
|
||||
// some optimized common cases that can be done in slightly fewer ops
|
||||
if (amount == 31)
|
||||
{
|
||||
JitSetCA();
|
||||
SAR(32, gpr.R(a), Imm8(31));
|
||||
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
|
||||
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
|
||||
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
|
||||
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
|
||||
}
|
||||
else if (amount == 1)
|
||||
{
|
||||
JitClearCAOV(true, false);
|
||||
SHR(32, R(RSCRATCH), Imm8(31)); // sign
|
||||
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
|
||||
SAR(32, gpr.R(a), Imm8(1));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(true, false);
|
||||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
SHL(32, R(RSCRATCH), Imm8(32 - amount));
|
||||
TEST(32, R(RSCRATCH), gpr.R(a));
|
||||
FinalizeCarry(CC_NZ);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.Lock(a, s);
|
||||
JitClearCAOV(false);
|
||||
FinalizeCarry(false);
|
||||
gpr.BindToRegister(a, a == s, true);
|
||||
|
||||
if (a != s)
|
||||
|
@ -1106,7 +1106,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
||||
Jit->JitSetCA();
|
||||
FixupBranch cont = Jit->J();
|
||||
Jit->SetJumpTarget(nocarry);
|
||||
Jit->JitClearCAOV(false);
|
||||
Jit->JitClearCAOV(true, false);
|
||||
Jit->SetJumpTarget(cont);
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -81,13 +81,16 @@ protected:
|
||||
bool isLastInstruction;
|
||||
bool memcheck;
|
||||
bool skipnext;
|
||||
bool carryFlagSet;
|
||||
bool carryFlagInverted;
|
||||
|
||||
int fifoBytesThisBlock;
|
||||
|
||||
PPCAnalyst::BlockStats st;
|
||||
PPCAnalyst::BlockRegStats gpa;
|
||||
PPCAnalyst::BlockRegStats fpa;
|
||||
PPCAnalyst::CodeOp *op;
|
||||
PPCAnalyst::CodeOp* op;
|
||||
PPCAnalyst::CodeOp* next_op;
|
||||
u8* rewriteStart;
|
||||
|
||||
JitBlock *curBlock;
|
||||
|
@ -845,13 +845,14 @@ void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
|
||||
SETcc(conditionCode, R(RSCRATCH));
|
||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
|
||||
}
|
||||
|
||||
void EmuCodeBlock::JitClearCAOV(bool oe)
|
||||
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
|
||||
{
|
||||
if (oe)
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0
|
||||
else
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
|
||||
if (mask == 0xFFFFFFFF)
|
||||
return;
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(mask));
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
void JitGetAndClearCAOV(bool oe);
|
||||
void JitSetCA();
|
||||
void JitSetCAIf(Gen::CCFlags conditionCode);
|
||||
void JitClearCAOV(bool oe);
|
||||
void JitClearCAOV(bool ca, bool oe);
|
||||
|
||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||
|
@ -213,14 +213,17 @@ static void AnalyzeFunction2(Symbol *func)
|
||||
func->flags = flags;
|
||||
}
|
||||
|
||||
// IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE
|
||||
static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||
{
|
||||
const GekkoOPInfo *a_info = a.opinfo;
|
||||
const GekkoOPInfo *b_info = b.opinfo;
|
||||
int a_flags = a_info->flags;
|
||||
int b_flags = b_info->flags;
|
||||
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL))
|
||||
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE))
|
||||
return false;
|
||||
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1))
|
||||
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc))
|
||||
return false;
|
||||
if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
|
||||
return false;
|
||||
|
||||
switch (b.inst.OPCD)
|
||||
@ -250,20 +253,16 @@ static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||
{
|
||||
int regInA = a.regsIn[j];
|
||||
int regInB = b.regsIn[j];
|
||||
if (regInA >= 0 &&
|
||||
(b.regsOut[0] == regInA ||
|
||||
b.regsOut[1] == regInA))
|
||||
{
|
||||
// reg collision! don't swap
|
||||
// register collision: b outputs to one of a's inputs
|
||||
if (regInA >= 0 && (b.regsOut[0] == regInA || b.regsOut[1] == regInA))
|
||||
return false;
|
||||
}
|
||||
if (regInB >= 0 &&
|
||||
(a.regsOut[0] == regInB ||
|
||||
a.regsOut[1] == regInB))
|
||||
{
|
||||
// reg collision! don't swap
|
||||
// register collision: a outputs to one of b's inputs
|
||||
if (regInB >= 0 && (a.regsOut[0] == regInB || a.regsOut[1] == regInB))
|
||||
return false;
|
||||
}
|
||||
// register collision: b outputs to one of a's outputs (overwriting it)
|
||||
for (int k = 0; k < 2; k++)
|
||||
if (b.regsOut[k] >= 0 && (b.regsOut[k] == a.regsOut[0] || b.regsOut[k] == a.regsOut[1]))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -403,34 +402,84 @@ void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db)
|
||||
leafSize, niceSize, unniceSize);
|
||||
}
|
||||
|
||||
void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
|
||||
static bool isCmp(const CodeOp& a)
|
||||
{
|
||||
// Instruction Reordering Pass
|
||||
// Bubble down compares towards branches, so that they can be merged.
|
||||
// -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch.
|
||||
for (u32 i = 0; i < (instructions - 2); ++i)
|
||||
return (a.inst.OPCD == 10 || a.inst.OPCD == 11) || (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32));
|
||||
}
|
||||
|
||||
static bool isRlwinm_rc(const CodeOp& a)
|
||||
{
|
||||
return a.inst.OPCD == 21 && a.inst.Rc;
|
||||
}
|
||||
|
||||
static bool isCarryOp(const CodeOp& a)
|
||||
{
|
||||
return (a.opinfo->flags & FL_SET_CA) && !(a.opinfo->flags & FL_SET_OE) && a.opinfo->type == OPTYPE_INTEGER;
|
||||
}
|
||||
|
||||
void PPCAnalyzer::ReorderInstructionsCore(u32 instructions, CodeOp* code, bool reverse, ReorderType type)
|
||||
{
|
||||
// Bubbling an instruction sometimes reveals another opportunity to bubble an instruction, so do
|
||||
// multiple passes.
|
||||
while (true)
|
||||
{
|
||||
CodeOp &a = code[i];
|
||||
CodeOp &b = code[i + 1];
|
||||
// All integer compares can be reordered.
|
||||
if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) ||
|
||||
(a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32)))
|
||||
// Instruction Reordering Pass
|
||||
// Carry pass: bubble carry-using instructions as close to each other as possible, so we can avoid
|
||||
// storing the carry flag.
|
||||
// Compare pass: bubble compare instructions next to branches, so they can be merged.
|
||||
bool swapped = false;
|
||||
int increment = reverse ? -1 : 1;
|
||||
int start = reverse ? instructions - 1 : 0;
|
||||
int end = reverse ? 0 : instructions - 1;
|
||||
for (int i = start; i != end; i += increment)
|
||||
{
|
||||
// Got a compare instruction.
|
||||
if (CanSwapAdjacentOps(a, b))
|
||||
CodeOp &a = code[i];
|
||||
CodeOp &b = code[i + increment];
|
||||
// Reorder integer compares, rlwinm., and carry-affecting ops
|
||||
// (if we add more merged branch instructions, add them here!)
|
||||
if ((type == REORDER_CARRY && isCarryOp(a)) || (type == REORDER_CMP && (isCmp(a) || isRlwinm_rc(a))))
|
||||
{
|
||||
// Alright, let's bubble it down!
|
||||
std::swap(a, b);
|
||||
// once we're next to a carry instruction, don't move away!
|
||||
if (type == REORDER_CARRY && i != start)
|
||||
{
|
||||
// if we read the CA flag, and the previous instruction sets it, don't move away.
|
||||
if (!reverse && (a.opinfo->flags & FL_READ_CA) && (code[i - increment].opinfo->flags & FL_SET_CA))
|
||||
continue;
|
||||
// if we set the CA flag, and the next instruction reads it, don't move away.
|
||||
if (reverse && (a.opinfo->flags & FL_SET_CA) && (code[i - increment].opinfo->flags & FL_READ_CA))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (CanSwapAdjacentOps(a, b))
|
||||
{
|
||||
// Alright, let's bubble it!
|
||||
std::swap(a, b);
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!swapped)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
|
||||
{
|
||||
// For carry, bubble instructions *towards* each other; one direction often isn't enough
|
||||
// to get pairs like addc/adde next to each other.
|
||||
if (HasOption(OPTION_CARRY_MERGE))
|
||||
{
|
||||
ReorderInstructionsCore(instructions, code, true, REORDER_CARRY);
|
||||
ReorderInstructionsCore(instructions, code, false, REORDER_CARRY);
|
||||
}
|
||||
if (HasOption(OPTION_BRANCH_MERGE))
|
||||
ReorderInstructionsCore(instructions, code, false, REORDER_CMP);
|
||||
}
|
||||
|
||||
void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index)
|
||||
{
|
||||
code->wantsCR0 = false;
|
||||
code->wantsCR1 = false;
|
||||
code->wantsPS1 = false;
|
||||
|
||||
if (opinfo->flags & FL_USE_FPU)
|
||||
block->m_fpa->any = true;
|
||||
@ -458,6 +507,24 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
||||
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
|
||||
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
|
||||
|
||||
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
|
||||
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
|
||||
|
||||
// We're going to try to avoid storing carry in XER if we can avoid it -- keep it in the x86 carry flag!
|
||||
// If the instruction reads CA but doesn't write it, we still need to store CA in XER; we can't
|
||||
// leave it in flags.
|
||||
if (HasOption(OPTION_CARRY_MERGE))
|
||||
code->wantsCAInFlags = code->wantsCA && code->outputCA && opinfo->type == OPTYPE_INTEGER;
|
||||
else
|
||||
code->wantsCAInFlags = false;
|
||||
|
||||
// mfspr/mtspr can affect/use XER, so be super careful here
|
||||
// we need to note specifically that mfspr needs CA in XER, not in the x86 carry flag
|
||||
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
|
||||
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
|
||||
code->outputCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||
|
||||
int numOut = 0;
|
||||
int numIn = 0;
|
||||
if (opinfo->flags & FL_OUT_A)
|
||||
@ -715,26 +782,30 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||
block->m_broken = true;
|
||||
}
|
||||
|
||||
// Scan for CR0 dependency
|
||||
// assume next block wants flags to be safe
|
||||
// Scan for flag dependencies; assume the next block (or any branch that can leave the block)
|
||||
// wants flags, to be safe.
|
||||
bool wantsCR0 = true;
|
||||
bool wantsCR1 = true;
|
||||
bool wantsPS1 = true;
|
||||
bool wantsFPRF = true;
|
||||
bool wantsCA = true;
|
||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||
{
|
||||
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock;
|
||||
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock;
|
||||
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock;
|
||||
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock;
|
||||
code[i].wantsCR0 = wantsCR0;
|
||||
code[i].wantsCR1 = wantsCR1;
|
||||
code[i].wantsPS1 = wantsPS1;
|
||||
code[i].wantsFPRF = wantsFPRF;
|
||||
wantsCR0 &= !code[i].outputCR0;
|
||||
wantsCR1 &= !code[i].outputCR1;
|
||||
wantsPS1 &= !code[i].outputPS1;
|
||||
wantsFPRF &= !code[i].outputFPRF;
|
||||
bool opWantsCR0 = code[i].wantsCR0;
|
||||
bool opWantsCR1 = code[i].wantsCR1;
|
||||
bool opWantsFPRF = code[i].wantsFPRF;
|
||||
bool opWantsCA = code[i].wantsCA;
|
||||
code[i].wantsCR0 = wantsCR0 || code[i].canEndBlock;
|
||||
code[i].wantsCR1 = wantsCR1 || code[i].canEndBlock;
|
||||
code[i].wantsFPRF = wantsFPRF || code[i].canEndBlock;
|
||||
code[i].wantsCA = wantsCA || code[i].canEndBlock;
|
||||
wantsCR0 |= opWantsCR0 || code[i].canEndBlock;
|
||||
wantsCR1 |= opWantsCR1 || code[i].canEndBlock;
|
||||
wantsFPRF |= opWantsFPRF || code[i].canEndBlock;
|
||||
wantsCA |= opWantsCA || code[i].canEndBlock;
|
||||
wantsCR0 &= !code[i].outputCR0 || opWantsCR0;
|
||||
wantsCR1 &= !code[i].outputCR1 || opWantsCR1;
|
||||
wantsFPRF &= !code[i].outputFPRF || opWantsFPRF;
|
||||
wantsCA &= !code[i].outputCA || opWantsCA;
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
@ -33,12 +33,13 @@ struct CodeOp //16B
|
||||
bool isBranchTarget;
|
||||
bool wantsCR0;
|
||||
bool wantsCR1;
|
||||
bool wantsPS1;
|
||||
bool wantsFPRF;
|
||||
bool wantsCA;
|
||||
bool wantsCAInFlags;
|
||||
bool outputCR0;
|
||||
bool outputCR1;
|
||||
bool outputPS1;
|
||||
bool outputFPRF;
|
||||
bool outputCA;
|
||||
bool canEndBlock;
|
||||
bool skip; // followed BL-s for example
|
||||
};
|
||||
@ -143,6 +144,13 @@ class PPCAnalyzer
|
||||
{
|
||||
private:
|
||||
|
||||
enum ReorderType
|
||||
{
|
||||
REORDER_CARRY,
|
||||
REORDER_CMP
|
||||
};
|
||||
|
||||
void ReorderInstructionsCore(u32 instructions, CodeOp* code, bool reverse, ReorderType type);
|
||||
void ReorderInstructions(u32 instructions, CodeOp *code);
|
||||
void SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index);
|
||||
|
||||
@ -175,6 +183,14 @@ public:
|
||||
// Requires JIT support to work.
|
||||
// XXX: NOT COMPLETE
|
||||
OPTION_FORWARD_JUMP = (1 << 3),
|
||||
|
||||
// Reorder compare/Rc instructions next to their associated branches and
|
||||
// merge in the JIT (for common cases, anyway).
|
||||
OPTION_BRANCH_MERGE = (1 << 4),
|
||||
|
||||
// Reorder carry instructions next to their associated branches and pass
|
||||
// carry flags in the x86 flags between them, instead of in XER.
|
||||
OPTION_CARRY_MERGE = (1 << 5),
|
||||
};
|
||||
|
||||
|
||||
|
@ -38,6 +38,7 @@ enum
|
||||
FL_LOADSTORE = (1<<19),
|
||||
FL_SET_FPRF = (1<<20),
|
||||
FL_READ_FPRF = (1<<21),
|
||||
FL_SET_OE = (1<<22),
|
||||
};
|
||||
|
||||
enum
|
||||
|
Loading…
x
Reference in New Issue
Block a user