diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp index 7db7c6e954..a06431f8d1 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp @@ -245,6 +245,13 @@ InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) { GRegCache[extra] = EmitZeroOp(LoadGReg, extra); return GRegCache[extra]; } + if (Opcode == LoadFReg) { + // Reg load folding: if we already loaded the value, + // load it again + if (!FRegCache[extra]) + FRegCache[extra] = EmitZeroOp(LoadFReg, extra); + return FRegCache[extra]; + } if (Opcode == LoadCarry) { if (!CarryCache) CarryCache = EmitZeroOp(LoadGReg, extra); @@ -270,6 +277,14 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { GRegCacheStore[extra] = EmitUOp(StoreGReg, Op1, extra); return GRegCacheStore[extra]; } + if (Opcode == StoreFReg) { + FRegCache[extra] = Op1; + if (FRegCacheStore[extra]) { + *FRegCacheStore[extra] = 0; + } + FRegCacheStore[extra] = EmitUOp(StoreFReg, Op1, extra); + return FRegCacheStore[extra]; + } if (Opcode == StoreCarry) { CarryCache = Op1; if (CarryCacheStore) { @@ -286,6 +301,10 @@ InstLoc IRBuilder::FoldUOp(unsigned Opcode, InstLoc Op1, unsigned extra) { CRCacheStore[extra] = EmitUOp(StoreCR, Op1, extra); return CRCacheStore[extra]; } + if (Opcode == CompactMRegToPacked) { + if (getOpcode(*Op1) == ExpandPackedToMReg) + return getOp1(Op1); + } return EmitUOp(Opcode, Op1, extra); } @@ -441,6 +460,8 @@ InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) { for (unsigned i = 0; i < 32; i++) { GRegCache[i] = 0; GRegCacheStore[i] = 0; + FRegCache[i] = 0; + FRegCacheStore[i] = 0; } CarryCache = 0; CarryCacheStore = 0; @@ -610,9 +631,10 @@ static X64Reg fregFindFreeReg(RegInfo& RI) { for (unsigned i = 0; i < FRegAllocSize; i++) if (RI.fregs[FRegAllocOrder[i]] == 0) return FRegAllocOrder[i]; - // XMM0/1 are scratch, so we don't allocate it - fregSpill(RI, XMM7); - return XMM7; + static unsigned nextReg = 0; + X64Reg reg = FRegAllocOrder[nextReg++ % FRegAllocSize]; + fregSpill(RI, reg); + return reg; } static OpArg regLocForInst(RegInfo& RI, InstLoc I) { @@ -1016,10 +1038,12 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { case SExt16: case BSwap32: case BSwap16: + case Cntlzw: case DupSingleToMReg: case DoubleToSingle: case ExpandPackedToMReg: case CompactMRegToPacked: + case FPNeg: if (thisUsed) regMarkUse(RI, I, getOp1(I), 1); break; @@ -1062,6 +1086,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { case ICmpSgt: case FSMul: case FSAdd: + case FPAdd: + case FPMul: + case FPSub: case InsertDoubleInMReg: if (thisUsed) { regMarkUse(RI, I, getOp1(I), 1); @@ -1237,6 +1264,17 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { regNormalRegClear(RI, I); break; } + case Cntlzw: { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->MOV(32, R(ECX), Imm32(63)); + Jit->BSR(32, reg, regLocForInst(RI, getOp1(I))); + Jit->CMOVcc(32, reg, R(ECX), CC_Z); + Jit->XOR(32, R(reg), Imm8(31)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } case And: { if (!thisUsed) break; regEmitBinInst(RI, I, &Jit64::AND); @@ -1447,6 +1485,17 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { fregNormalRegClear(RI, I); break; } + case FPNeg: { + if (!thisUsed) break; + X64Reg reg = fregFindFreeReg(RI); + Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I))); + static const u32 GC_ALIGNED16(psSignBits[4]) = + {0x80000000, 0x80000000}; + Jit->PXOR(reg, M((void*)&psSignBits)); + RI.fregs[reg] = I; + fregNormalRegClear(RI, I); + break; + } case LoadFReg: { if (!thisUsed) break; X64Reg reg = fregFindFreeReg(RI); @@ -1488,6 +1537,33 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { fregNormalRegClear(RI, I); break; } + case FPAdd: { + if (!thisUsed) break; + X64Reg reg = fregFindFreeReg(RI); + Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I))); + Jit->ADDPS(reg, fregLocForInst(RI, getOp2(I))); + RI.fregs[reg] = I; + fregNormalRegClear(RI, I); + break; + } + case FPMul: { + if (!thisUsed) break; + X64Reg reg = fregFindFreeReg(RI); + Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I))); + Jit->MULPS(reg, fregLocForInst(RI, getOp2(I))); + RI.fregs[reg] = I; + fregNormalRegClear(RI, I); + break; + } + case FPSub: { + if (!thisUsed) break; + X64Reg reg = fregFindFreeReg(RI); + Jit->MOVAPD(reg, fregLocForInst(RI, getOp1(I))); + Jit->SUBPS(reg, fregLocForInst(RI, getOp2(I))); + RI.fregs[reg] = I; + fregNormalRegClear(RI, I); + break; + } case CInt32: case CInt16: { if (!thisUsed) break; @@ -1579,11 +1655,19 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { if (!thisUsed) break; // FIXME: Optimize! InstLoc Op = I - 1 - (*I >> 8); - X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), regLocForInst(RI, Op)); - RI.regs[reg] = I; - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, Op); + if (isFResult(*Op)) { + X64Reg reg = fregFindFreeReg(RI); + Jit->MOVAPD(reg, fregLocForInst(RI, Op)); + RI.fregs[reg] = I; + if (RI.IInfo[I - RI.FirstI] & 4) + fregClearInst(RI, Op); + } else { + X64Reg reg = regFindFreeReg(RI); + Jit->MOV(32, R(reg), regLocForInst(RI, Op)); + RI.regs[reg] = I; + if (RI.IInfo[I - RI.FirstI] & 4) + regClearInst(RI, Op); + } break; } case Nop: break; diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h index 4fafd65b81..092831ed03 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h @@ -41,6 +41,7 @@ namespace IREmitter { SExt16, BSwap32, BSwap16, + Cntlzw, // Count leading zeros Load8, // These loads zext Load16, Load32, @@ -143,19 +144,26 @@ namespace IREmitter { ForceToDouble, ForceToMReg, #endif + FResult_Start, LoadSingle, LoadDouble, LoadPaired, // This handles quantizers itself - StorePaired, DoubleToSingle, DupSingleToMReg, + DupSingleToPacked, InsertDoubleInMReg, ExpandPackedToMReg, CompactMRegToPacked, LoadFReg, - StoreFReg, FSMul, FSAdd, + FPAdd, + FPMul, + FPSub, + FPNeg, + FResult_End, + StorePaired, + StoreFReg, // "Trinary" operators // FIXME: Need to change representation! @@ -189,37 +197,9 @@ namespace IREmitter { return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32; } - unsigned inline isUnary(Inst i) { - return getOpcode(i) >= SExt8 && getOpcode(i) <= BSwap16; - } - - unsigned inline isBinary(Inst i) { - return getOpcode(i) >= Add && getOpcode(i) <= ICmpCRUnsigned; - } - - unsigned inline isMemLoad(Inst i) { - return getOpcode(i) >= Load8 && getOpcode(i) <= Load32; - } - - unsigned inline isMemStore(Inst i) { - return getOpcode(i) >= Store8 && getOpcode(i) <= Store32; - } - - unsigned inline isRegLoad(Inst i) { - return getOpcode(i) >= LoadGReg && getOpcode(i) <= LoadCR; - } - - unsigned inline isRegStore(Inst i) { - return getOpcode(i) >= LoadGReg && getOpcode(i) <= LoadCR; - } - - unsigned inline isBranch(Inst i) { - return getOpcode(i) >= BranchUncond && - getOpcode(i) <= BranchCond; - } - - unsigned inline isInterpreterFallback(Inst i) { - return getOpcode(i) == InterpreterFallback; + unsigned inline isFResult(Inst i) { + return getOpcode(i) > FResult_Start && + getOpcode(i) < FResult_End; } InstLoc inline getOp1(InstLoc i) { @@ -360,6 +340,9 @@ namespace IREmitter { InstLoc EmitSExt8(InstLoc op1) { return FoldUOp(SExt8, op1); } + InstLoc EmitCntlzw(InstLoc op1) { + return FoldUOp(Cntlzw, op1); + } InstLoc EmitICmpCRSigned(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpCRSigned, op1, op2); } @@ -405,6 +388,9 @@ namespace IREmitter { InstLoc EmitDupSingleToMReg(InstLoc val) { return FoldUOp(DupSingleToMReg, val); } + InstLoc EmitDupSingleToPacked(InstLoc val) { + return FoldUOp(DupSingleToPacked, val); + } InstLoc EmitInsertDoubleInMReg(InstLoc val, InstLoc reg) { return FoldBiOp(InsertDoubleInMReg, val, reg); } @@ -420,6 +406,18 @@ namespace IREmitter { InstLoc EmitFSAdd(InstLoc op1, InstLoc op2) { return FoldBiOp(FSAdd, op1, op2); } + InstLoc EmitFPAdd(InstLoc op1, InstLoc op2) { + return FoldBiOp(FPAdd, op1, op2); + } + InstLoc EmitFPMul(InstLoc op1, InstLoc op2) { + return FoldBiOp(FPMul, op1, op2); + } + InstLoc EmitFPSub(InstLoc op1, InstLoc op2) { + return FoldBiOp(FPSub, op1, op2); + } + InstLoc EmitFPNeg(InstLoc op1) { + return FoldUOp(FPNeg, op1); + } InstLoc EmitDoubleToSingle(InstLoc op1) { return FoldUOp(DoubleToSingle, op1); } @@ -439,6 +437,8 @@ namespace IREmitter { for (unsigned i = 0; i < 32; i++) { GRegCache[i] = 0; GRegCacheStore[i] = 0; + FRegCache[i] = 0; + FRegCacheStore[i] = 0; } CarryCache = 0; CarryCacheStore = 0; @@ -458,6 +458,8 @@ namespace IREmitter { InstLoc curReadPtr; InstLoc GRegCache[32]; InstLoc GRegCacheStore[32]; + InstLoc FRegCache[32]; + InstLoc FRegCacheStore[32]; InstLoc CarryCache; InstLoc CarryCacheStore; InstLoc CRCache[8]; diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp index 9be273564c..7a741528b2 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp @@ -179,39 +179,32 @@ void Jit64::subfic(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, d = inst.RD; - gpr.FlushLockX(ECX); - gpr.Lock(a, d); - gpr.LoadToX64(d, a == d, true); - int imm = inst.SIMM_16; - MOV(32, R(EAX), gpr.R(a)); - NOT(32, R(EAX)); - ADD(32, R(EAX), Imm32(imm + 1)); - MOV(32, gpr.R(d), R(EAX)); - //GenerateCarry(ECX); - gpr.UnlockAll(); - gpr.UnlockAllX(); - // This instruction has no RC flag + Default(inst); + return; + // FIXME: Disabling until I figure out subfcx + IREmitter::InstLoc val, test, c; + c = ibuild.EmitIntConst(inst.SIMM_16); + val = ibuild.EmitSub(c, ibuild.EmitLoadGReg(inst.RA)); + ibuild.EmitStoreGReg(val, inst.RD); + test = ibuild.EmitICmpUgt(val, c); + ibuild.EmitStoreCarry(test); } void Jit64::subfcx(UGeckoInstruction inst) { - INSTRUCTION_START; Default(inst); return; - /* - u32 a = m_GPR[_inst.RA]; - u32 b = m_GPR[_inst.RB]; - m_GPR[_inst.RD] = b - a; - SetCarry(a == 0 || Helper_Carry(b, 0-a)); - - if (_inst.OE) PanicAlert("OE: subfcx"); - if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]); - */ + // FIXME: Figure out what the heck is going wrong here... + if (inst.OE) PanicAlert("OE: subfcx"); + IREmitter::InstLoc val, test, lhs, rhs; + lhs = ibuild.EmitLoadGReg(inst.RB); + rhs = ibuild.EmitLoadGReg(inst.RA); + val = ibuild.EmitSub(lhs, rhs); + ibuild.EmitStoreGReg(val, inst.RD); + test = ibuild.EmitICmpUgt(rhs, lhs); + ibuild.EmitStoreCarry(test); + if (inst.Rc) + ComputeRC(ibuild, val); } void Jit64::subfex(UGeckoInstruction inst) @@ -394,33 +387,14 @@ void Jit64::rlwnmx(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.R(a).IsImm()) - { - Default(inst); - return; - } - - u32 mask = Helper_Mask(inst.MB, inst.ME); - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - MOV(32, R(EAX), gpr.R(s)); - MOV(32, R(ECX), gpr.R(b)); - AND(32, R(ECX), Imm32(0x1f)); - ROL(32, R(EAX), R(ECX)); - AND(32, R(EAX), Imm32(mask)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); + INSTRUCTION_START + unsigned mask = Helper_Mask(inst.MB, inst.ME); + IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); + val = ibuild.EmitRol(val, ibuild.EmitLoadGReg(inst.RB)); + val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask)); + ibuild.EmitStoreGReg(val, inst.RA); if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } + ComputeRC(ibuild, val); } void Jit64::negx(UGeckoInstruction inst) @@ -509,30 +483,9 @@ // count leading zeroes void Jit64::cntlzwx(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int s = inst.RS; - if (gpr.R(a).IsImm() || gpr.R(s).IsImm() || s == a) - { - Default(inst); - return; - } - gpr.Lock(a,s); - gpr.LoadToX64(a,false); - BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s)); - FixupBranch gotone = J_CC(CC_NZ); - MOV(32, gpr.R(a), Imm32(63)); - SetJumpTarget(gotone); - XOR(32, gpr.R(a), Imm8(0x1f)); // flip order - gpr.UnlockAll(); - + IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); + val = ibuild.EmitCntlzw(val); + ibuild.EmitStoreGReg(val, inst.RA); if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - // TODO: Check PPC manual too - } + ComputeRC(ibuild, val); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp index 06f21b354d..fcd512f232 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp @@ -88,8 +88,10 @@ void Jit64::lXz(UGeckoInstruction inst) IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); + if (inst.OPCD & 1) + ibuild.EmitStoreGReg(addr, inst.RA); IREmitter::InstLoc val; - switch (inst.OPCD) + switch (inst.OPCD & -2) { case 32: val = ibuild.EmitLoad32(addr); break; //lwz case 40: val = ibuild.EmitLoad16(addr); break; //lhz diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp index dd2f95ba25..b0b2a3cf58 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Paired.cpp @@ -159,70 +159,31 @@ add a,b,a */ - //There's still a little bit more optimization that can be squeezed out of this - void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg)) - { - fpr.Lock(d, a, b); - - if (d == a) - { - fpr.LoadToX64(d, true); - (this->*op)(fpr.RX(d), fpr.R(b)); - } - else if (d == b && reversible) - { - fpr.LoadToX64(d, true); - (this->*op)(fpr.RX(d), fpr.R(a)); - } - else if (a != d && b != d) - { - //sources different from d, can use rather quick solution - fpr.LoadToX64(d, false); - MOVAPD(fpr.RX(d), fpr.R(a)); - (this->*op)(fpr.RX(d), fpr.R(b)); - } - else if (b != d) - { - fpr.LoadToX64(d, false); - MOVAPD(XMM0, fpr.R(b)); - MOVAPD(fpr.RX(d), fpr.R(a)); - (this->*op)(fpr.RX(d), Gen::R(XMM0)); - } - else //Other combo, must use two temps :( - { - MOVAPD(XMM0, fpr.R(a)); - MOVAPD(XMM1, fpr.R(b)); - fpr.LoadToX64(d, false); - (this->*op)(XMM0, Gen::R(XMM1)); - MOVAPD(fpr.RX(d), Gen::R(XMM0)); - } - ForceSinglePrecisionP(fpr.RX(d)); - fpr.UnlockAll(); - } - void Jit64::ps_arith(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { + { + if (inst.Rc || (inst.SUBOP5 != 21 && inst.SUBOP5 != 20 && inst.SUBOP5 != 25)) { Default(inst); return; } + IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), rhs; + if (inst.SUBOP5 == 25) + rhs = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); + else + rhs = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); + val = ibuild.EmitCompactMRegToPacked(val); + switch (inst.SUBOP5) { - case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div - case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub - case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add - case 23://sel - Default(inst); + case 20: + val = ibuild.EmitFPSub(val, rhs); break; - case 24://res - Default(inst); + case 21: + val = ibuild.EmitFPAdd(val, rhs); break; - case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul - default: - _assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!"); + case 25: + val = ibuild.EmitFPMul(val, rhs); } + val = ibuild.EmitExpandPackedToMReg(val); + ibuild.EmitStoreFReg(val, inst.FD); } void Jit64::ps_sum(UGeckoInstruction inst) @@ -347,58 +308,37 @@ //TODO: add optimized cases void Jit64::ps_maddXX(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { + if (inst.Rc || (inst.SUBOP5 != 28 && inst.SUBOP5 != 29 && inst.SUBOP5 != 30)) { Default(inst); return; } - int a = inst.FA; - int b = inst.FB; - int c = inst.FC; - int d = inst.FD; - fpr.Lock(a,b,c,d); - - MOVAPD(XMM0, fpr.R(a)); + + IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA), op2, op3; + val = ibuild.EmitCompactMRegToPacked(val); switch (inst.SUBOP5) { - case 14: //madds0 - MOVDDUP(XMM1, fpr.R(c)); - MULPD(XMM0, R(XMM1)); - ADDPD(XMM0, fpr.R(b)); + case 28: {//msub + op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); + val = ibuild.EmitFPMul(val, op2); + op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); + val = ibuild.EmitFPSub(val, op3); break; - case 15: //madds1 - MOVAPD(XMM1, fpr.R(c)); - SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower - MULPD(XMM0, R(XMM1)); - ADDPD(XMM0, fpr.R(b)); - break; - case 28: //msub - MULPD(XMM0, fpr.R(c)); - SUBPD(XMM0, fpr.R(b)); - break; - case 29: //madd - MULPD(XMM0, fpr.R(c)); - ADDPD(XMM0, fpr.R(b)); - break; - case 30: //nmsub - MULPD(XMM0, fpr.R(c)); - SUBPD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits)); - break; - case 31: //nmadd - MULPD(XMM0, fpr.R(c)); - ADDPD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits)); - break; - default: - _assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!"); - //Default(inst); - //fpr.UnlockAll(); - return; } - fpr.LoadToX64(d, false); - MOVAPD(fpr.RX(d), Gen::R(XMM0)); - ForceSinglePrecisionP(fpr.RX(d)); - fpr.UnlockAll(); + case 29: {//madd + op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); + val = ibuild.EmitFPMul(val, op2); + op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); + val = ibuild.EmitFPAdd(val, op3); + break; + } + case 30: {//nmsub + op2 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FC)); + val = ibuild.EmitFPMul(val, op2); + op3 = ibuild.EmitCompactMRegToPacked(ibuild.EmitLoadFReg(inst.FB)); + val = ibuild.EmitFPSub(val, op3); + val = ibuild.EmitFPNeg(val); + break; + } + } + val = ibuild.EmitExpandPackedToMReg(val); + ibuild.EmitStoreFReg(val, inst.FD); }