diff --git a/Source/Core/Common/Src/ArmCPUDetect.cpp b/Source/Core/Common/Src/ArmCPUDetect.cpp index ca277ade02..a04c88834f 100644 --- a/Source/Core/Common/Src/ArmCPUDetect.cpp +++ b/Source/Core/Common/Src/ArmCPUDetect.cpp @@ -17,9 +17,10 @@ #include "Common.h" #include "CPUDetect.h" -#include "StringUtil.h" #include "FileUtil.h" +// Only Linux platforms have /proc/cpuinfo +#if !defined(BLACKBERRY) && !defined(IOS) && !defined(__SYMBIAN32__) const char procfile[] = "/proc/cpuinfo"; char *GetCPUString() @@ -33,7 +34,7 @@ char *GetCPUString() auto const fp = file.GetHandle(); if (!fp) return 0; - + while (fgets(buf, sizeof(buf), fp)) { if (strncmp(buf, marker, sizeof(marker) - 1)) @@ -42,6 +43,7 @@ char *GetCPUString() cpu_string = strndup(cpu_string, strlen(cpu_string) - 1); // Strip the newline break; } + return cpu_string; } @@ -91,14 +93,13 @@ unsigned short GetCPUPart() break; } return part; - } bool CheckCPUFeature(const char *feature) { const char marker[] = "Features\t: "; char buf[1024]; - + File::IOFile file(procfile, "r"); auto const fp = file.GetHandle(); if (!fp) @@ -117,10 +118,18 @@ bool CheckCPUFeature(const char *feature) token = strtok(NULL, " "); } } + return false; } +#endif + int GetCoreCount() { +#ifdef __SYMBIAN32__ + return 1; +#elif defined(BLACKBERRY) || defined(IOS) + return 2; +#else const char marker[] = "processor\t: "; int cores = 0; char buf[1024]; @@ -129,14 +138,16 @@ int GetCoreCount() auto const fp = file.GetHandle(); if (!fp) return 0; - + while (fgets(buf, sizeof(buf), fp)) { if (strncmp(buf, marker, sizeof(marker) - 1)) continue; ++cores; } + return cores; +#endif } CPUInfo cpu_info; @@ -153,12 +164,58 @@ void CPUInfo::Detect() HTT = false; OS64bit = false; CPU64bit = false; - Mode64bit = false; + Mode64bit = false; vendor = VENDOR_ARM; - + // Get the information about the CPU - strncpy(cpu_string, GetCPUString(), sizeof(cpu_string)); num_cores = GetCoreCount(); +#if defined(__SYMBIAN32__) || defined(BLACKBERRY) || defined(IOS) + bool isVFP3 = false; + bool isVFP4 = false; +#ifdef IOS + isVFP3 = true; + // Check for swift arch (VFP4`) + #ifdef __ARM_ARCH_7S__ + isVFP4 = true; + #endif // #ifdef __ARM_ARCH_7S__ +#elif defined(BLACKBERRY) + isVFP3 = true; + const char cpuInfoPath[] = "/pps/services/hw_info/inventory"; + const char marker[] = "Processor_Name::"; + const char qcCPU[] = "MSM"; + char buf[1024]; + FILE* fp; + if (fp = fopen(cpuInfoPath, "r")) + { + while (fgets(buf, sizeof(buf), fp)) + { + if (strncmp(buf, marker, sizeof(marker) - 1)) + continue; + if (strncmp(buf + sizeof(marker) - 1, qcCPU, sizeof(qcCPU) - 1) == 0) + isVFP4 = true; + break; + } + fclose(fp); + } +#endif + // Hardcode this for now + bSwp = true; + bHalf = true; + bThumb = false; + bFastMult = true; + bVFP = true; + bEDSP = true; + bThumbEE = isVFP3; + bNEON = isVFP3; + bVFPv3 = isVFP3; + bTLS = true; + bVFPv4 = isVFP4; + bIDIVa = isVFP4; + bIDIVt = isVFP4; + bFP = false; + bASIMD = false; +#else + strncpy(cpu_string, GetCPUString(), sizeof(cpu_string)); bSwp = CheckCPUFeature("swp"); bHalf = CheckCPUFeature("half"); bThumb = CheckCPUFeature("thumb"); @@ -172,16 +229,15 @@ void CPUInfo::Detect() bVFPv4 = CheckCPUFeature("vfpv4"); bIDIVa = CheckCPUFeature("idiva"); bIDIVt = CheckCPUFeature("idivt"); - // Qualcomm Krait supports IDIVA but it doesn't report it. Check for krait. if (GetCPUImplementer() == 0x51 && GetCPUPart() == 0x6F) // Krait(300) is 0x6F, Scorpion is 0x4D - bIDIVa = bIDIVt = true; - - // These two are ARMv8 specific. + bIDIVa = bIDIVt = true; + // These two require ARMv8 or higher bFP = CheckCPUFeature("fp"); bASIMD = CheckCPUFeature("asimd"); - - +#endif +// On android, we build a separate library for ARMv7 so this is fine. +// TODO: Check for ARMv7 on other platforms. #if defined(__ARM_ARCH_7A__) bArmV7 = true; #else @@ -193,11 +249,14 @@ void CPUInfo::Detect() std::string CPUInfo::Summarize() { std::string sum; +#if defined(BLACKBERRY) || defined(IOS) || defined(__SYMBIAN32__) + sum = StringFromFormat("%i cores", num_cores); +#else if (num_cores == 1) sum = StringFromFormat("%s, %i core", cpu_string, num_cores); else sum = StringFromFormat("%s, %i cores", cpu_string, num_cores); - +#endif if (bSwp) sum += ", SWP"; if (bHalf) sum += ", Half"; if (bThumb) sum += ", Thumb"; diff --git a/Source/Core/Common/Src/ArmEmitter.cpp b/Source/Core/Common/Src/ArmEmitter.cpp index 1b20a1784e..a59dc53b6f 100644 --- a/Source/Core/Common/Src/ArmEmitter.cpp +++ b/Source/Core/Common/Src/ArmEmitter.cpp @@ -86,7 +86,7 @@ bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated) Operand2 AssumeMakeOperand2(u32 imm) { Operand2 op2; bool result = TryMakeOperand2(imm, op2); - _assert_msg_(DYNA_REC, result, "Could not make assumed Operand2."); + _dbg_assert_msg_(DYNA_REC, result, "Could not make assumed Operand2."); return op2; } @@ -117,14 +117,33 @@ bool ARMXEmitter::TrySetValue_TwoOp(ARMReg reg, u32 val) return true; } -void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg) +void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate) { union {float f; u32 u;} conv; - conv.f = val; + conv.f = negate ? -val : val; + // Try moving directly first if mantisse is empty + if (cpu_info.bVFPv3 && ((conv.u & 0x7FFFF) == 0)) + { + // VFP Encoding for Imms: <7> Not(<6>) Repeat(<6>,5) <5:0> Zeros(19) + bool bit6 = (conv.u & 0x40000000) == 0x40000000; + bool canEncode = true; + for (u32 mask = 0x20000000; mask >= 0x2000000; mask >>= 1) + { + if (((conv.u & mask) == mask) == bit6) + canEncode = false; + } + if (canEncode) + { + u32 imm8 = (conv.u & 0x80000000) >> 24; // sign bit + imm8 |= (!bit6 << 6); + imm8 |= (conv.u & 0x1F80000) >> 19; + VMOV(dest, IMM(imm8)); + return; + } + } MOVI2R(tempReg, conv.u); VMOV(dest, tempReg); - // TODO: VMOV an IMM directly if possible - // Otherwise, use a literal pool and VLDR directly (+- 1020) + // Otherwise, possible to use a literal pool and VLDR directly (+- 1020) } void ARMXEmitter::ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch) @@ -246,8 +265,12 @@ void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize) } void ARMXEmitter::QuickCallFunction(ARMReg reg, void *func) { - MOVI2R(reg, (u32)(func)); - BL(reg); + if (BLInRange(func)) { + BL(func); + } else { + MOVI2R(reg, (u32)(func)); + BL(reg); + } } void ARMXEmitter::SetCodePtr(u8 *ptr) @@ -369,7 +392,7 @@ FixupBranch ARMXEmitter::B_CC(CCFlags Cond) void ARMXEmitter::B_CC(CCFlags Cond, const void *fnptr) { s32 distance = (s32)fnptr - (s32(code) + 8); - _assert_msg_(DYNA_REC, distance > -33554432 + _dbg_assert_msg_(DYNA_REC, distance > -33554432 && distance <= 33554432, "B_CC out of range (%p calls %p)", code, fnptr); @@ -388,7 +411,7 @@ FixupBranch ARMXEmitter::BL_CC(CCFlags Cond) void ARMXEmitter::SetJumpTarget(FixupBranch const &branch) { s32 distance = (s32(code) - 8) - (s32)branch.ptr; - _assert_msg_(DYNA_REC, distance > -33554432 + _dbg_assert_msg_(DYNA_REC, distance > -33554432 && distance <= 33554432, "SetJumpTarget out of range (%p calls %p)", code, branch.ptr); @@ -402,7 +425,7 @@ void ARMXEmitter::SetJumpTarget(FixupBranch const &branch) void ARMXEmitter::B (const void *fnptr) { s32 distance = (s32)fnptr - (s32(code) + 8); - _assert_msg_(DYNA_REC, distance > -33554432 + _dbg_assert_msg_(DYNA_REC, distance > -33554432 && distance <= 33554432, "B out of range (%p calls %p)", code, fnptr); @@ -414,10 +437,18 @@ void ARMXEmitter::B(ARMReg src) Write32(condition | 0x12FFF10 | src); } +bool ARMXEmitter::BLInRange(const void *fnptr) { + s32 distance = (s32)fnptr - (s32(code) + 8); + if (distance <= -33554432 || distance > 33554432) + return false; + else + return true; +} + void ARMXEmitter::BL(const void *fnptr) { s32 distance = (s32)fnptr - (s32(code) + 8); - _assert_msg_(DYNA_REC, distance > -33554432 + _dbg_assert_msg_(DYNA_REC, distance > -33554432 && distance <= 33554432, "BL out of range (%p calls %p)", code, fnptr); Write32(condition | 0x0B000000 | ((distance >> 2) & 0x00FFFFFF)); @@ -555,7 +586,7 @@ void ARMXEmitter::WriteInstruction (u32 Op, ARMReg Rd, ARMReg Rn, Operand2 Rm, b } } if (op == -1) - _assert_msg_(DYNA_REC, false, "%s not yet support %d", InstNames[Op], Rm.GetType()); + _dbg_assert_msg_(DYNA_REC, false, "%s not yet support %d", InstNames[Op], Rm.GetType()); Write32(condition | (op << 21) | (SetFlags ? (1 << 20) : 0) | Rn << 16 | Rd << 12 | Data); } @@ -652,11 +683,11 @@ void ARMXEmitter::RBIT(ARMReg dest, ARMReg src) } void ARMXEmitter::REV (ARMReg dest, ARMReg src) { - Write32(condition | (0x6B << 20) | (0xF << 16) | (dest << 12) | (0xF3 << 4) | src); + Write32(condition | (0x6BF << 16) | (dest << 12) | (0xF3 << 4) | src); } void ARMXEmitter::REV16(ARMReg dest, ARMReg src) { - Write32(condition | (0x3DF << 16) | (dest << 12) | (0xFD << 4) | src); + Write32(condition | (0x6BF << 16) | (dest << 12) | (0xFB << 4) | src); } void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, Operand2 op2) @@ -677,7 +708,7 @@ void ARMXEmitter::LDREX(ARMReg dest, ARMReg base) } void ARMXEmitter::STREX(ARMReg result, ARMReg base, ARMReg op) { - _assert_msg_(DYNA_REC, (result != base && result != op), "STREX dest can't be other two registers"); + _dbg_assert_msg_(DYNA_REC, (result != base && result != op), "STREX dest can't be other two registers"); Write32(condition | (24 << 20) | (base << 16) | (result << 12) | (0xF9 << 4) | op); } void ARMXEmitter::DMB () @@ -730,7 +761,7 @@ void ARMXEmitter::WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 Rm, bool R bool SignedLoad = false; if (op == -1) - _assert_msg_(DYNA_REC, false, "%s does not support %d", LoadStoreNames[Op], Rm.GetType()); + _dbg_assert_msg_(DYNA_REC, false, "%s does not support %d", LoadStoreNames[Op], Rm.GetType()); switch (Op) { @@ -854,10 +885,25 @@ ARMReg ARMXEmitter::SubBase(ARMReg Reg) } // NEON Specific +void ARMXEmitter::VABD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VABD(float)"); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VABD(float) when CPU doesn't support it"); + bool register_quad = Vd >= Q0; + + // Gets encoded as a double register + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \ + | ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \ + | ((Vm & 0x10) << 2) | (Vm & 0xF)); +} void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VADD(integer)"); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VADD(integer) when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VADD(integer)"); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VADD(integer) when CPU doesn't support it"); bool register_quad = Vd >= Q0; @@ -868,13 +914,13 @@ void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \ | ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \ - | ((Vm & 0x10) << 1) | (Vm & 0xF)); + | ((Vm & 0x10) << 1) | (Vm & 0xF)); } void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to VSUB(integer)"); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VSUB(integer) when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to VSUB(integer)"); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VSUB(integer) when CPU doesn't support it"); // Gets encoded as a double register Vd = SubBase(Vd); @@ -883,36 +929,37 @@ void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \ | ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \ - | ((Vm & 0x10) << 2) | (Vm & 0xF)); + | ((Vm & 0x10) << 2) | (Vm & 0xF)); } -// VFP Specific -struct VFPEnc -{ - s16 opc1; - s16 opc2; -}; // Double/single, Neon -const VFPEnc VFPOps[][2] = { +extern const VFPEnc VFPOps[16][2] = { {{0xE0, 0xA0}, {0x20, 0xD1}}, // 0: VMLA - {{0xE0, 0xA4}, {0x22, 0xD1}}, // 1: VMLS - {{0xE3, 0xA0}, {0x20, 0xD0}}, // 2: VADD - {{0xE3, 0xA4}, {0x22, 0xD0}}, // 3: VSUB - {{0xE2, 0xA0}, {0x30, 0xD1}}, // 4: VMUL - {{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 5: VABS(Vn(0x0) used for encoding) - {{0xE8, 0xA0}, { -1, -1}}, // 6: VDIV - {{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 7: VNEG(Vn(0x1) used for encoding) - {{0xEB, 0xAC}, { -1, -1}}, // 8: VSQRT (Vn(0x1) used for encoding) - {{0xEB, 0xA4}, { -1, -1}}, // 9: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding) - {{0xEB, 0xAC}, { -1, -1}}, // 10: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding) - {{ -1, -1}, {0x3B, 0x30}}, // 11: VABSi + {{0xE1, 0xA4}, { -1, -1}}, // 1: VNMLA + {{0xE0, 0xA4}, {0x22, 0xD1}}, // 2: VMLS + {{0xE1, 0xA0}, { -1, -1}}, // 3: VNMLS + {{0xE3, 0xA0}, {0x20, 0xD0}}, // 4: VADD + {{0xE3, 0xA4}, {0x22, 0xD0}}, // 5: VSUB + {{0xE2, 0xA0}, {0x30, 0xD1}}, // 6: VMUL + {{0xE2, 0xA4}, { -1, -1}}, // 7: VNMUL + {{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 8: VABS(Vn(0x0) used for encoding) + {{0xE8, 0xA0}, { -1, -1}}, // 9: VDIV + {{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 10: VNEG(Vn(0x1) used for encoding) + {{0xEB, 0xAC}, { -1, -1}}, // 11: VSQRT (Vn(0x1) used for encoding) + {{0xEB, 0xA4}, { -1, -1}}, // 12: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding) + {{0xEB, 0xAC}, { -1, -1}}, // 13: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding) + {{ -1, -1}, {0x3B, 0x30}}, // 14: VABSi }; -const char *VFPOpNames[] = { + +extern const char *VFPOpNames[16] = { "VMLA", + "VNMLA", "VMLS", + "VNMLS", "VADD", "VSUB", "VMUL", + "VNMUL", "VABS", "VDIV", "VNEG", @@ -966,6 +1013,7 @@ u32 ARMXEmitter::EncodeVm(ARMReg Vm) else return ((Reg & 0x1) << 5) | (Reg >> 1); } + void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm) { bool quad_reg = Vd >= Q0; @@ -973,39 +1021,42 @@ void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm) VFPEnc enc = VFPOps[Op][quad_reg]; if (enc.opc1 == -1 && enc.opc2 == -1) - _assert_msg_(DYNA_REC, false, "%s does not support %s", VFPOpNames[Op], quad_reg ? "NEON" : "VFP"); + _dbg_assert_msg_(DYNA_REC, false, "%s does not support %s", VFPOpNames[Op], quad_reg ? "NEON" : "VFP"); u32 VdEnc = EncodeVd(Vd); u32 VnEnc = EncodeVn(Vn); u32 VmEnc = EncodeVm(Vm); u32 cond = quad_reg ? (0xF << 28) : condition; - + Write32(cond | (enc.opc1 << 20) | VnEnc | VdEnc | (enc.opc2 << 4) | (quad_reg << 6) | (double_reg << 8) | VmEnc); } void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(0, Vd, Vn, Vm); } -void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); } -void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); } -void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); } -void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); } -void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(5, Vd, D0, Vm); } -void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); } -void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(7, Vd, D1, Vm); } -void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D1, Vm); } -void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(9, Vd, D4, Vm); } -void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D4, Vm); } -void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(9, Vd, D5, D0); } -void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(10, Vd, D5, D0); } +void ARMXEmitter::VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); } +void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); } +void ARMXEmitter::VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); } +void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); } +void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(5, Vd, Vn, Vm); } +void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); } +void ARMXEmitter::VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(7, Vd, Vn, Vm); } +void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D0, Vm); } +void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(9, Vd, Vn, Vm); } +void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D1, Vm); } +void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(11, Vd, D1, Vm); } +void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(12, Vd, D4, Vm); } +void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(13, Vd, D4, Vm); } +void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(12, Vd, D5, D0); } +void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(13, Vd, D5, D0); } void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset) { - _assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR"); - _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR"); + _dbg_assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR"); + _dbg_assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR"); bool Add = offset >= 0 ? true : false; u32 imm = abs(offset); - _assert_msg_(DYNA_REC, (imm & 0xC03) == 0, "VLDR: Offset needs to be word aligned and small enough"); + _dbg_assert_msg_(DYNA_REC, (imm & 0xC03) == 0, "VLDR: Offset needs to be word aligned and small enough"); - if (imm & 0xC03) + if (imm & 0xC03) ERROR_LOG(DYNA_REC, "VLDR: Bad offset %08x", imm); bool single_reg = Dest < D0; @@ -1015,25 +1066,25 @@ void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset) if (single_reg) { Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \ - | ((Dest & 0x1E) << 11) | (10 << 8) | (imm >> 2)); + | ((Dest & 0x1E) << 11) | (10 << 8) | (imm >> 2)); } else { Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \ - | ((Dest & 0xF) << 12) | (11 << 8) | (imm >> 2)); + | ((Dest & 0xF) << 12) | (11 << 8) | (imm >> 2)); } } void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, s16 offset) { - _assert_msg_(DYNA_REC, Src >= S0 && Src <= D31, "Passed invalid src register to VSTR"); - _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid base register to VSTR"); + _dbg_assert_msg_(DYNA_REC, Src >= S0 && Src <= D31, "Passed invalid src register to VSTR"); + _dbg_assert_msg_(DYNA_REC, Base <= R15, "Passed invalid base register to VSTR"); bool Add = offset >= 0 ? true : false; u32 imm = abs(offset); - _assert_msg_(DYNA_REC, (imm & 0xC03) == 0, "VSTR: Offset needs to be word aligned and small enough"); + _dbg_assert_msg_(DYNA_REC, (imm & 0xC03) == 0, "VSTR: Offset needs to be word aligned and small enough"); - if (imm & 0xC03) + if (imm & 0xC03) ERROR_LOG(DYNA_REC, "VSTR: Bad offset %08x", imm); bool single_reg = Src < D0; @@ -1043,12 +1094,12 @@ void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, s16 offset) if (single_reg) { Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x1) << 22) | (Base << 16) \ - | ((Src & 0x1E) << 11) | (10 << 8) | (imm >> 2)); + | ((Src & 0x1E) << 11) | (10 << 8) | (imm >> 2)); } else { Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x10) << 18) | (Base << 16) \ - | ((Src & 0xF) << 12) | (11 << 8) | (imm >> 2)); + | ((Src & 0xF) << 12) | (11 << 8) | (imm >> 2)); } } @@ -1063,15 +1114,20 @@ void ARMXEmitter::VMSR(ARMReg Rt) { } // VFP and ASIMD +void ARMXEmitter::VMOV(ARMReg Dest, Operand2 op2) +{ + _dbg_assert_msg_(DYNA_REC, cpu_info.bVFPv3, "VMOV #imm requires VFPv3"); + Write32(condition | (0xEB << 20) | EncodeVd(Dest) | (0xA << 8) | op2.Imm8VFP()); +} void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high) { - _assert_msg_(DYNA_REC, Src < S0, "This VMOV doesn't support SRC other than ARM Reg"); - _assert_msg_(DYNA_REC, Dest >= D0, "This VMOV doesn't support DEST other than VFP"); + _dbg_assert_msg_(DYNA_REC, Src < S0, "This VMOV doesn't support SRC other than ARM Reg"); + _dbg_assert_msg_(DYNA_REC, Dest >= D0, "This VMOV doesn't support DEST other than VFP"); Dest = SubBase(Dest); Write32(condition | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \ - | (11 << 8) | ((Dest & 0x10) << 3) | (1 << 4)); + | (0xB << 8) | ((Dest & 0x10) << 3) | (1 << 4)); } void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) @@ -1091,7 +1147,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) else { // Move 64bit from Arm reg - _assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM to NEON"); + _dbg_assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM to NEON"); return; } } @@ -1111,14 +1167,14 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) else { // Move 64bit To Arm reg - _assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM From NEON"); + _dbg_assert_msg_(DYNA_REC, false, "This VMOV doesn't support moving 64bit ARM From NEON"); return; } } else { // Move Arm reg to Arm reg - _assert_msg_(DYNA_REC, false, "VMOV doesn't support moving ARM registers"); + _dbg_assert_msg_(DYNA_REC, false, "VMOV doesn't support moving ARM registers"); } } // Moving NEON registers @@ -1127,7 +1183,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) bool Single = DestSize == 1; bool Quad = DestSize == 4; - _assert_msg_(DYNA_REC, SrcSize == DestSize, "VMOV doesn't support moving different register sizes"); + _dbg_assert_msg_(DYNA_REC, SrcSize == DestSize, "VMOV doesn't support moving different register sizes"); Dest = SubBase(Dest); Src = SubBase(Src); @@ -1142,7 +1198,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) // Double and quad if (Quad) { - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use quad registers when you don't support ASIMD."); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use quad registers when you don't support ASIMD."); // Gets encoded as a Double register Write32((0xF2 << 24) | ((Dest & 0x10) << 18) | (2 << 20) | ((Src & 0xF) << 16) \ | ((Dest & 0xF) << 12) | (1 << 8) | ((Src & 0x10) << 3) | (1 << 6) \ @@ -1160,13 +1216,39 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags) { bool single_reg = (Dest < D0) && (Source < D0); + bool single_double = !single_reg && (Source < D0 || Dest < D0); + bool single_to_double = Source < D0; int op = ((flags & TO_INT) ? (flags & ROUND_TO_ZERO) : (flags & IS_SIGNED)) ? 1 : 0; int op2 = ((flags & TO_INT) ? (flags & IS_SIGNED) : 0) ? 1 : 0; Dest = SubBase(Dest); Source = SubBase(Source); - if (single_reg) + if (single_double) { + // S32<->F64 + if ((flags & TO_INT) || (flags & TO_FLOAT)) + { + if (single_to_double) + { + Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) \ + | ((Dest & 0xF) << 12) | (op << 7) | (0x2D << 6) | ((Source & 0x1) << 5) | (Source >> 1)); + } else { + Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \ + | ((Dest & 0x1E) << 11) | (op << 7) | (0x2D << 6) | ((Source & 0x10) << 1) | (Source & 0xF)); + } + } + // F32<->F64 + else { + if (single_to_double) + { + Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | (0x7 << 16) \ + | ((Dest & 0xF) << 12) | (0x2F << 6) | ((Source & 0x1) << 5) | (Source >> 1)); + } else { + Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | (0x7 << 16) \ + | ((Dest & 0x1E) << 11) | (0x2B << 6) | ((Source & 0x10) << 1) | (Source & 0xF)); + } + } + } else if (single_reg) { Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \ | ((Dest & 0x1E) << 11) | (op << 7) | (0x29 << 6) | ((Source & 0x1) << 5) | (Source >> 1)); } else { diff --git a/Source/Core/Common/Src/ArmEmitter.h b/Source/Core/Common/Src/ArmEmitter.h index b3fab39c40..0bfe679d35 100644 --- a/Source/Core/Common/Src/ArmEmitter.h +++ b/Source/Core/Common/Src/ArmEmitter.h @@ -365,7 +365,7 @@ private: u32 EncodeVn(ARMReg Vn); u32 EncodeVm(ARMReg Vm); void WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm); - + void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); // New Ops @@ -431,6 +431,7 @@ public: void B (ARMReg src); void BL(const void *fnptr); void BL(ARMReg src); + bool BLInRange(const void *fnptr); void PUSH(const int num, ...); void POP(const int num, ...); @@ -530,6 +531,7 @@ public: // Subtracts the base from the register to give us the real one ARMReg SubBase(ARMReg Reg); // NEON Only + void VABD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); void VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); void VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); @@ -541,6 +543,10 @@ public: // Compares against zero void VCMP(ARMReg Vd); void VCMPE(ARMReg Vd); + + void VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VSQRT(ARMReg Vd, ARMReg Vm); @@ -552,6 +558,7 @@ public: void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMOV(ARMReg Dest, Operand2 op2); void VMOV(ARMReg Dest, ARMReg Src, bool high); void VMOV(ARMReg Dest, ARMReg Src); void VCVT(ARMReg Dest, ARMReg Src, int flags); @@ -564,7 +571,7 @@ public: // Wrapper around MOVT/MOVW with fallbacks. void MOVI2R(ARMReg reg, u32 val, bool optimize = true); - void MOVI2F(ARMReg dest, float val, ARMReg tempReg); + void MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate = false); void ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch); void ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch); @@ -624,7 +631,11 @@ public: // Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()). void WriteProtect() { - WriteProtectMemory(region, region_size, true); + WriteProtectMemory(region, region_size, true); + } + void UnWriteProtect() + { + UnWriteProtectMemory(region, region_size, false); } void ResetCodePtr() @@ -636,8 +647,24 @@ public: { return region_size - (GetCodePtr() - region); } + + u8 *GetBasePtr() { + return region; + } + + size_t GetOffset(u8 *ptr) { + return ptr - region; + } }; +// VFP Specific +struct VFPEnc { + s16 opc1; + s16 opc2; +}; +extern const VFPEnc VFPOps[16][2]; +extern const char *VFPOpNames[16]; + } // namespace #endif // _DOLPHIN_INTEL_CODEGEN_