mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-03-14 04:19:29 +00:00
A bunch more WIP JIT work.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1779 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
3a3fd721d1
commit
6ad44fd470
@ -245,6 +245,13 @@ InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) {
|
||||
GRegCache[extra] = EmitZeroOp(LoadGReg, extra);
|
||||
return GRegCache[extra];
|
||||
}
|
||||
if (Opcode == LoadFReg) {
|
||||
// Reg load folding: if we already loaded the value,
|
||||
// load it again
|
||||
if (!FRegCache[extra])
|
||||
FRegCache[extra] = EmitZeroOp(LoadFReg, extra);
|
||||
return FRegCache[extra];
|
||||
}
|
||||
if (Opcode == LoadCarry) {
|
||||
if (!CarryCache)
|
||||
CarryCache = EmitZeroOp(LoadGReg, extra);
|
||||
@ -270,6 +277,14 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) {
|
||||
GRegCacheStore[extra] = EmitUOp(StoreGReg, Op1, extra);
|
||||
return GRegCacheStore[extra];
|
||||
}
|
||||
if (Opcode == StoreFReg) {
|
||||
FRegCache[extra] = Op1;
|
||||
if (FRegCacheStore[extra]) {
|
||||
*FRegCacheStore[extra] = 0;
|
||||
}
|
||||
FRegCacheStore[extra] = EmitUOp(StoreFReg, Op1, extra);
|
||||
return FRegCacheStore[extra];
|
||||
}
|
||||
if (Opcode == StoreCarry) {
|
||||
CarryCache = Op1;
|
||||
if (CarryCacheStore) {
|
||||
@ -286,6 +301,10 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) {
|
||||
CRCacheStore[extra] = EmitUOp(StoreCR, Op1, extra);
|
||||
return CRCacheStore[extra];
|
||||
}
|
||||
if (Opcode == CompactMRegToPacked) {
|
||||
if (getOpcode(*Op1) == ExpandPackedToMReg)
|
||||
return getOp1(Op1);
|
||||
}
|
||||
|
||||
return EmitUOp(Opcode, Op1, extra);
|
||||
}
|
||||
@ -441,6 +460,8 @@ InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) {
|
||||
for (unsigned i = 0; i < 32; i++) {
|
||||
GRegCache[i] = 0;
|
||||
GRegCacheStore[i] = 0;
|
||||
FRegCache[i] = 0;
|
||||
FRegCacheStore[i] = 0;
|
||||
}
|
||||
CarryCache = 0;
|
||||
CarryCacheStore = 0;
|
||||
@ -610,9 +631,10 @@ static X64Reg fregFindFreeReg(RegInfo& RI) {
|
||||
for (unsigned i = 0; i < FRegAllocSize; i++)
|
||||
if (RI.fregs[FRegAllocOrder[i]] == 0)
|
||||
return FRegAllocOrder[i];
|
||||
// XMM0/1 are scratch, so we don't allocate it
|
||||
fregSpill(RI, XMM7);
|
||||
return XMM7;
|
||||
static unsigned nextReg = 0;
|
||||
X64Reg reg = FRegAllocOrder[nextReg++ % FRegAllocSize];
|
||||
fregSpill(RI, reg);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static OpArg regLocForInst(RegInfo& RI, InstLoc I) {
|
||||
@ -1016,10 +1038,12 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||
case SExt16:
|
||||
case BSwap32:
|
||||
case BSwap16:
|
||||
case Cntlzw:
|
||||
case DupSingleToMReg:
|
||||
case DoubleToSingle:
|
||||
case ExpandPackedToMReg:
|
||||
case CompactMRegToPacked:
|
||||
case FPNeg:
|
||||
if (thisUsed)
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
break;
|
||||
@ -1062,6 +1086,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||
case ICmpSgt:
|
||||
case FSMul:
|
||||
case FSAdd:
|
||||
case FPAdd:
|
||||
case FPMul:
|
||||
case FPSub:
|
||||
case InsertDoubleInMReg:
|
||||
if (thisUsed) {
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
@ -1237,6 +1264,17 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case Cntlzw: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = regUReg(RI, I);
|
||||
Jit->MOV(32, R(ECX), Imm32(63));
|
||||
Jit->BSR(32, reg, regLocForInst(RI, getOp1(I)));
|
||||
Jit->CMOVcc(32, reg, R(ECX), CC_Z);
|
||||
Jit->XOR(32, R(reg), Imm8(31));
|
||||
RI.regs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case And: {
|
||||
if (!thisUsed) break;
|
||||
regEmitBinInst(RI, I, &Jit64::AND);
|
||||
@ -1447,6 +1485,17 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case FPNeg: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I)));
|
||||
static const u32 GC_ALIGNED16(psSignBits[4]) =
|
||||
{0x80000000, 0x80000000};
|
||||
Jit->PXOR(reg, M((void*)&psSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case LoadFReg: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
@ -1488,6 +1537,33 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case FPAdd: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I)));
|
||||
Jit->ADDPS(reg, fregLocForInst(RI, getOp2(I)));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case FPMul: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I)));
|
||||
Jit->MULPS(reg, fregLocForInst(RI, getOp2(I)));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case FPSub: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I)));
|
||||
Jit->SUBPS(reg, fregLocForInst(RI, getOp2(I)));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case CInt32:
|
||||
case CInt16: {
|
||||
if (!thisUsed) break;
|
||||
@ -1579,11 +1655,19 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||
if (!thisUsed) break;
|
||||
// FIXME: Optimize!
|
||||
InstLoc Op = I - 1 - (*I >> 8);
|
||||
X64Reg reg = regFindFreeReg(RI);
|
||||
Jit->MOV(32, R(reg), regLocForInst(RI, Op));
|
||||
RI.regs[reg] = I;
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, Op);
|
||||
if (isFResult(*Op)) {
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
Jit->MOVAPD(reg, fregLocForInst(RI, Op));
|
||||
RI.fregs[reg] = I;
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
fregClearInst(RI, Op);
|
||||
} else {
|
||||
X64Reg reg = regFindFreeReg(RI);
|
||||
Jit->MOV(32, R(reg), regLocForInst(RI, Op));
|
||||
RI.regs[reg] = I;
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, Op);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Nop: break;
|
||||
|
@ -41,6 +41,7 @@ namespace IREmitter {
|
||||
SExt16,
|
||||
BSwap32,
|
||||
BSwap16,
|
||||
Cntlzw, // Count leading zeros
|
||||
Load8, // These loads zext
|
||||
Load16,
|
||||
Load32,
|
||||
@ -143,19 +144,26 @@ namespace IREmitter {
|
||||
ForceToDouble,
|
||||
ForceToMReg,
|
||||
#endif
|
||||
FResult_Start,
|
||||
LoadSingle,
|
||||
LoadDouble,
|
||||
LoadPaired, // This handles quantizers itself
|
||||
StorePaired,
|
||||
DoubleToSingle,
|
||||
DupSingleToMReg,
|
||||
DupSingleToPacked,
|
||||
InsertDoubleInMReg,
|
||||
ExpandPackedToMReg,
|
||||
CompactMRegToPacked,
|
||||
LoadFReg,
|
||||
StoreFReg,
|
||||
FSMul,
|
||||
FSAdd,
|
||||
FPAdd,
|
||||
FPMul,
|
||||
FPSub,
|
||||
FPNeg,
|
||||
FResult_End,
|
||||
StorePaired,
|
||||
StoreFReg,
|
||||
|
||||
// "Trinary" operators
|
||||
// FIXME: Need to change representation!
|
||||
@ -189,37 +197,9 @@ namespace IREmitter {
|
||||
return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32;
|
||||
}
|
||||
|
||||
unsigned inline isUnary(Inst i) {
|
||||
return getOpcode(i) >= SExt8 && getOpcode(i) <= BSwap16;
|
||||
}
|
||||
|
||||
unsigned inline isBinary(Inst i) {
|
||||
return getOpcode(i) >= Add && getOpcode(i) <= ICmpCRUnsigned;
|
||||
}
|
||||
|
||||
unsigned inline isMemLoad(Inst i) {
|
||||
return getOpcode(i) >= Load8 && getOpcode(i) <= Load32;
|
||||
}
|
||||
|
||||
unsigned inline isMemStore(Inst i) {
|
||||
return getOpcode(i) >= Store8 && getOpcode(i) <= Store32;
|
||||
}
|
||||
|
||||
unsigned inline isRegLoad(Inst i) {
|
||||
return getOpcode(i) >= LoadGReg && getOpcode(i) <= LoadCR;
|
||||
}
|
||||
|
||||
unsigned inline isRegStore(Inst i) {
|
||||
return getOpcode(i) >= LoadGReg && getOpcode(i) <= LoadCR;
|
||||
}
|
||||
|
||||
unsigned inline isBranch(Inst i) {
|
||||
return getOpcode(i) >= BranchUncond &&
|
||||
getOpcode(i) <= BranchCond;
|
||||
}
|
||||
|
||||
unsigned inline isInterpreterFallback(Inst i) {
|
||||
return getOpcode(i) == InterpreterFallback;
|
||||
unsigned inline isFResult(Inst i) {
|
||||
return getOpcode(i) > FResult_Start &&
|
||||
getOpcode(i) < FResult_End;
|
||||
}
|
||||
|
||||
InstLoc inline getOp1(InstLoc i) {
|
||||
@ -360,6 +340,9 @@ namespace IREmitter {
|
||||
InstLoc EmitSExt8(InstLoc op1) {
|
||||
return FoldUOp(SExt8, op1);
|
||||
}
|
||||
InstLoc EmitCntlzw(InstLoc op1) {
|
||||
return FoldUOp(Cntlzw, op1);
|
||||
}
|
||||
InstLoc EmitICmpCRSigned(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpCRSigned, op1, op2);
|
||||
}
|
||||
@ -405,6 +388,9 @@ namespace IREmitter {
|
||||
InstLoc EmitDupSingleToMReg(InstLoc val) {
|
||||
return FoldUOp(DupSingleToMReg, val);
|
||||
}
|
||||
InstLoc EmitDupSingleToPacked(InstLoc val) {
|
||||
return FoldUOp(DupSingleToPacked, val);
|
||||
}
|
||||
InstLoc EmitInsertDoubleInMReg(InstLoc val, InstLoc reg) {
|
||||
return FoldBiOp(InsertDoubleInMReg, val, reg);
|
||||
}
|
||||
@ -420,6 +406,18 @@ namespace IREmitter {
|
||||
InstLoc EmitFSAdd(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(FSAdd, op1, op2);
|
||||
}
|
||||
InstLoc EmitFPAdd(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(FPAdd, op1, op2);
|
||||
}
|
||||
InstLoc EmitFPMul(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(FPMul, op1, op2);
|
||||
}
|
||||
InstLoc EmitFPSub(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(FPSub, op1, op2);
|
||||
}
|
||||
InstLoc EmitFPNeg(InstLoc op1) {
|
||||
return FoldUOp(FPNeg, op1);
|
||||
}
|
||||
InstLoc EmitDoubleToSingle(InstLoc op1) {
|
||||
return FoldUOp(DoubleToSingle, op1);
|
||||
}
|
||||
@ -439,6 +437,8 @@ namespace IREmitter {
|
||||
for (unsigned i = 0; i < 32; i++) {
|
||||
GRegCache[i] = 0;
|
||||
GRegCacheStore[i] = 0;
|
||||
FRegCache[i] = 0;
|
||||
FRegCacheStore[i] = 0;
|
||||
}
|
||||
CarryCache = 0;
|
||||
CarryCacheStore = 0;
|
||||
@ -458,6 +458,8 @@ namespace IREmitter {
|
||||
InstLoc curReadPtr;
|
||||
InstLoc GRegCache[32];
|
||||
InstLoc GRegCacheStore[32];
|
||||
InstLoc FRegCache[32];
|
||||
InstLoc FRegCacheStore[32];
|
||||
InstLoc CarryCache;
|
||||
InstLoc CarryCacheStore;
|
||||
InstLoc CRCache[8];
|
||||
|
@ -179,39 +179,32 @@
|
||||
|
||||
void Jit64::subfic(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA, d = inst.RD;
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, d);
|
||||
gpr.LoadToX64(d, a == d, true);
|
||||
int imm = inst.SIMM_16;
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
NOT(32, R(EAX));
|
||||
ADD(32, R(EAX), Imm32(imm + 1));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
//GenerateCarry(ECX);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
// This instruction has no RC flag
|
||||
Default(inst);
|
||||
return;
|
||||
// FIXME: Disabling until I figure out subfcx
|
||||
IREmitter::InstLoc val, test, c;
|
||||
c = ibuild.EmitIntConst(inst.SIMM_16);
|
||||
val = ibuild.EmitSub(c, ibuild.EmitLoadGReg(inst.RA));
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
test = ibuild.EmitICmpUgt(val, c);
|
||||
ibuild.EmitStoreCarry(test);
|
||||
}
|
||||
|
||||
void Jit64::subfcx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
Default(inst);
|
||||
return;
|
||||
/*
|
||||
u32 a = m_GPR[_inst.RA];
|
||||
u32 b = m_GPR[_inst.RB];
|
||||
m_GPR[_inst.RD] = b - a;
|
||||
SetCarry(a == 0 || Helper_Carry(b, 0-a));
|
||||
|
||||
if (_inst.OE) PanicAlert("OE: subfcx");
|
||||
if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]);
|
||||
*/
|
||||
// FIXME: Figure out what the heck is going wrong here...
|
||||
if (inst.OE) PanicAlert("OE: subfcx");
|
||||
IREmitter::InstLoc val, test, lhs, rhs;
|
||||
lhs = ibuild.EmitLoadGReg(inst.RB);
|
||||
rhs = ibuild.EmitLoadGReg(inst.RA);
|
||||
val = ibuild.EmitSub(lhs, rhs);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
test = ibuild.EmitICmpUgt(rhs, lhs);
|
||||
ibuild.EmitStoreCarry(test);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::subfex(UGeckoInstruction inst)
|
||||
@ -394,33 +387,14 @@
|
||||
|
||||
void Jit64::rlwnmx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA, b = inst.RB, s = inst.RS;
|
||||
if (gpr.R(a).IsImm())
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
u32 mask = Helper_Mask(inst.MB, inst.ME);
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, b, s);
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
MOV(32, R(ECX), gpr.R(b));
|
||||
AND(32, R(ECX), Imm32(0x1f));
|
||||
ROL(32, R(EAX), R(ECX));
|
||||
AND(32, R(EAX), Imm32(mask));
|
||||
MOV(32, gpr.R(a), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
INSTRUCTION_START
|
||||
unsigned mask = Helper_Mask(inst.MB, inst.ME);
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitRol(val, ibuild.EmitLoadGReg(inst.RB));
|
||||
val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask));
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
CALL((u8*)asm_routines.computeRc);
|
||||
}
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::negx(UGeckoInstruction inst)
|
||||
@ -509,30 +483,9 @@
|
||||
// count leading zeroes
|
||||
void Jit64::cntlzwx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA;
|
||||
int s = inst.RS;
|
||||
if (gpr.R(a).IsImm() || gpr.R(s).IsImm() || s == a)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
gpr.Lock(a,s);
|
||||
gpr.LoadToX64(a,false);
|
||||
BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s));
|
||||
FixupBranch gotone = J_CC(CC_NZ);
|
||||
MOV(32, gpr.R(a), Imm32(63));
|
||||
SetJumpTarget(gotone);
|
||||
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
|
||||
gpr.UnlockAll();
|
||||
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitCntlzw(val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
CALL((u8*)asm_routines.computeRc);
|
||||
// TODO: Check PPC manual too
|
||||
}
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
@ -88,8 +88,10 @@ void Jit64::lXz(UGeckoInstruction inst)
|
||||
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
if (inst.OPCD & 1)
|
||||
ibuild.EmitStoreGReg(addr, inst.RA);
|
||||
IREmitter::InstLoc val;
|
||||
switch (inst.OPCD)
|
||||
switch (inst.OPCD & -2)
|
||||
{
|
||||
case 32: val = ibuild.EmitLoad32(addr); break; //lwz
|
||||
case 40: val = ibuild.EmitLoad16(addr); break; //lhz
|
||||
|
@ -159,70 +159,31 @@
|
||||
add a,b,a
|
||||
*/
|
||||
|
||||
//There's still a little bit more optimization that can be squeezed out of this
|
||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg))
|
||||
{
|
||||
fpr.Lock(d, a, b);
|
||||
|
||||
if (d == a)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (d == b && reversible)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||
}
|
||||
else if (a != d && b != d)
|
||||
{
|
||||
//sources different from d, can use rather quick solution
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (b != d)
|
||||
{
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(XMM0, fpr.R(b));
|
||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
else //Other combo, must use two temps :(
|
||||
{
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
fpr.LoadToX64(d, false);
|
||||
(this->*op)(XMM0, Gen::R(XMM1));
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::ps_arith(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
{
|
||||
if (inst.Rc || (inst.SUBOP5 != 21 && inst.SUBOP5 != 20 && inst.SUBOP5 != 25)) {
|
||||
Default(inst); return;
|
||||
}
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), rhs;
|
||||
if (inst.SUBOP5 == 25)
|
||||
rhs = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
|
||||
else
|
||||
rhs = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
|
||||
val = ibuild.EmitCompactMRegToPacked(val);
|
||||
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
|
||||
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
|
||||
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
|
||||
case 23://sel
|
||||
Default(inst);
|
||||
case 20:
|
||||
val = ibuild.EmitFPSub(val, rhs);
|
||||
break;
|
||||
case 24://res
|
||||
Default(inst);
|
||||
case 21:
|
||||
val = ibuild.EmitFPAdd(val, rhs);
|
||||
break;
|
||||
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
||||
case 25:
|
||||
val = ibuild.EmitFPMul(val, rhs);
|
||||
}
|
||||
val = ibuild.EmitExpandPackedToMReg(val);
|
||||
ibuild.EmitStoreFReg(val, inst.FD);
|
||||
}
|
||||
|
||||
void Jit64::ps_sum(UGeckoInstruction inst)
|
||||
@ -347,58 +308,37 @@
|
||||
//TODO: add optimized cases
|
||||
void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
if (inst.Rc || (inst.SUBOP5 != 28 && inst.SUBOP5 != 29 && inst.SUBOP5 != 30)) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
int d = inst.FD;
|
||||
fpr.Lock(a,b,c,d);
|
||||
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), op2, op3;
|
||||
val = ibuild.EmitCompactMRegToPacked(val);
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 14: //madds0
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
case 28: {//msub
|
||||
op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
|
||||
val = ibuild.EmitFPMul(val, op2);
|
||||
op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
|
||||
val = ibuild.EmitFPSub(val, op3);
|
||||
break;
|
||||
case 15: //madds1
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||
MULPD(XMM0, R(XMM1));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 28: //msub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 29: //madd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 30: //nmsub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||
//Default(inst);
|
||||
//fpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
case 29: {//madd
|
||||
op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
|
||||
val = ibuild.EmitFPMul(val, op2);
|
||||
op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
|
||||
val = ibuild.EmitFPAdd(val, op3);
|
||||
break;
|
||||
}
|
||||
case 30: {//nmsub
|
||||
op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC));
|
||||
val = ibuild.EmitFPMul(val, op2);
|
||||
op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB));
|
||||
val = ibuild.EmitFPSub(val, op3);
|
||||
val = ibuild.EmitFPNeg(val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
val = ibuild.EmitExpandPackedToMReg(val);
|
||||
ibuild.EmitStoreFReg(val, inst.FD);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user