From f4f59ea71ed69797834eb2ba6e53516cf2a6ac44 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 12:56:45 -0600 Subject: [PATCH 1/7] [AArch64] Fix ADDS/SUBS emitter functions. These weren't emitting the flag bit. So they were regular ADD and SUB emitters. --- Source/Core/Common/Arm64Emitter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 3d1c9801be..f07ce26bc7 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -799,7 +799,7 @@ void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Optio void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - ADD(Rd, Rn, Rm, ArithOption(Rd)); + EncodeArithmeticInst(0, true, Rd, Rn, Rm, ArithOption(Rd)); } void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) @@ -819,7 +819,7 @@ void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Optio void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - SUB(Rd, Rn, Rm, ArithOption(Rd)); + EncodeArithmeticInst(1, false, Rd, Rn, Rm, ArithOption(Rd)); } void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) From 8dba84dd7c114ba0d63b8a6fc7ed93c3c2d85cff Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 12:58:37 -0600 Subject: [PATCH 2/7] [AArch64] Fix 8 & 16 bit loadstore indexes. I wasn't bit shifting correctly for 8 and 16bit loadstores. --- Source/Core/Common/Arm64Emitter.cpp | 26 ++++++++++++++------------ Source/Core/Common/Arm64Emitter.h | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index f07ce26bc7..00ed7af5cf 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -417,15 +417,17 @@ void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | Rt); } -void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm) +void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size) { bool b64Bit = Is64Bit(Rt); bool bVec = IsVector(Rt); - if (b64Bit) + if (size == 64) imm >>= 3; - else + else if (size == 32) imm >>= 2; + else if (size == 16) + imm >>= 1; _assert_msg_(DYNA_REC, imm < 0, "%s(INDEX_UNSIGNED): offset must be positive", __FUNCTION__); _assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __FUNCTION__, imm); @@ -1282,7 +1284,7 @@ void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm, 8); else EncodeLoadStoreIndexedInst(0x0E0, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); @@ -1290,7 +1292,7 @@ void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm, 8); else EncodeLoadStoreIndexedInst(0x0E1, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); @@ -1298,7 +1300,7 @@ void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm, 8); else EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E2 : 0x0E3, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); @@ -1306,7 +1308,7 @@ void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm, 16); else EncodeLoadStoreIndexedInst(0x1E0, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); @@ -1314,7 +1316,7 @@ void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm, 16); else EncodeLoadStoreIndexedInst(0x1E1, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); @@ -1322,7 +1324,7 @@ void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm, 16); else EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E2 : 0x1E3, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); @@ -1330,7 +1332,7 @@ void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32); else EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E0 : 0x2E0, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); @@ -1338,7 +1340,7 @@ void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32); else EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E1 : 0x2E1, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); @@ -1346,7 +1348,7 @@ void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { if (type == INDEX_UNSIGNED) - EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm); + EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm, 32); else EncodeLoadStoreIndexedInst(0x2E2, type == INDEX_POST ? 1 : 3, Rt, Rn, imm); diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 02e70336da..14d691d47e 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -294,7 +294,7 @@ private: void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt); void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm); void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm); - void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); + void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size); void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos); void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend); From d2eaba5cb7d528ff6c17ce979b61bf32f72d4ddb Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 13:00:39 -0600 Subject: [PATCH 3/7] [AArch64] Minor MOVI2R improvement. Use the ZR for both input arguments in the case the immediate is the maximum immediate value. This allows it to be aliased to MVN when disassembling. --- Source/Core/Common/Arm64Emitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 00ed7af5cf..98db3de011 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1433,7 +1433,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) // Max unsigned value // Set to ~ZR ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP; - ORN(Rd, Rd, ZR, ArithOption(ZR, ST_LSL, 0)); + ORN(Rd, ZR, ZR, ArithOption(ZR, ST_LSL, 0)); return; } From 0a23ca9461eefff8d77a7b8a2bfe0ccdc9458497 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 13:03:01 -0600 Subject: [PATCH 4/7] [AArch64] Add MUL/MNEG instruction aliases to the emitter. --- Source/Core/Common/Arm64Emitter.cpp | 8 ++++++++ Source/Core/Common/Arm64Emitter.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 98db3de011..a9b810650b 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1022,6 +1022,14 @@ void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { EncodeData3SrcInst(7, Rd, Rn, Rm, Ra); } +void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData3SrcInst(0, Rd, Rn, Rm, SP); +} +void ARM64XEmitter::MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData3SrcInst(1, Rd, Rn, Rm, SP); +} // Logical (shifted register) void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 14d691d47e..1c2273e220 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -452,6 +452,8 @@ public: void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); // Logical (shifted register) void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); From 2b4f1aed40c75f30e81df5900c27399cfac6370e Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 13:03:51 -0600 Subject: [PATCH 5/7] [AArch64] Minor shifted register adjustment. If we have a shift amount that is the full length of the source register then we have an invalid instruction. This can happen when dealing with a couple of PowerPC instructions. This same adjustment is already in the ARMv7 emitter. --- Source/Core/Common/Arm64Emitter.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 1c2273e220..cb3c60119e 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -238,9 +238,17 @@ public: m_shifttype = shift_type; m_type = TYPE_SHIFTEDREG; if (Is64Bit(Rd)) + { m_width = WIDTH_64BIT; + if (shift == 64) + m_shift = 0; + } else + { m_width = WIDTH_32BIT; + if (shift == 32) + m_shift = 0; + } } TypeSpecifier GetType() const { From 5a0133c478bd9cbc5edf210ac27b9878de856439 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 13:05:55 -0600 Subject: [PATCH 6/7] [AArch64] Add a few more VFP register helpers. Renames Is128Bit to IsQuad to line up more with the other helpers. --- Source/Core/Common/Arm64Emitter.cpp | 2 +- Source/Core/Common/Arm64Emitter.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index a9b810650b..3546638519 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -377,7 +377,7 @@ void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) { bool b64Bit = Is64Bit(Rt); - bool b128Bit = Is128Bit(Rt); + bool b128Bit = IsQuad(Rt); bool bVec = IsVector(Rt); if (b128Bit) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index cb3c60119e..0933d2c1b3 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -76,7 +76,9 @@ enum ARM64Reg }; inline bool Is64Bit(ARM64Reg reg) { return reg & 0x20; } -inline bool Is128Bit(ARM64Reg reg) { return reg & 0xC0; } +inline bool IsSingle(ARM64Reg reg) { return reg & 0x40; } +inline bool IsDouble(ARM64Reg reg) { return reg & 0x80; } +inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; } inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; } inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); } inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); } From c511ee763adadbdb05976993cecaf7bcdf06853b Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 13:42:36 -0600 Subject: [PATCH 7/7] [AArch64] Add the float emitter. --- Source/Core/Common/Arm64Emitter.cpp | 493 ++++++++++++++++++++++++++++ Source/Core/Common/Arm64Emitter.h | 92 ++++++ 2 files changed, 585 insertions(+) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 3546638519..d5e082aa2a 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1607,5 +1607,498 @@ void ARM64XEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask) } } +// Float Emitter +void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) +{ + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + u32 encoded_size = 0; + u32 encoded_imm = 0; + + if (size == 8) + encoded_size = 0; + else if (size == 16) + encoded_size = 1; + else if (size == 32) + encoded_size = 2; + else if (size == 64) + encoded_size = 3; + else if (size == 128) + encoded_size = 0; + + if (type == INDEX_UNSIGNED) + { + _assert_msg_(DYNA_REC, imm & (size - 1), "%s(INDEX_UNSIGNED) immediate offset must be aligned to size!", __FUNCTION__); + _assert_msg_(DYNA_REC, imm < 0, "%s(INDEX_UNSIGNED) immediate offset must be positive!", __FUNCTION__); + if (size == 16) + imm >>= 1; + else if (size == 32) + imm >>= 2; + else if (size == 64) + imm >>= 3; + else if (size == 128) + imm >>= 4; + encoded_imm = (imm & 0xFFF); + } + else + { + _assert_msg_(DYNA_REC, imm < -256 || imm > 255, "%s immediate offset must be within range of -256 to 256!", __FUNCTION__); + encoded_imm = (imm & 0x1FF) << 2; + if (type == INDEX_POST) + encoded_imm |= 1; + else + encoded_imm |= 3; + } + + Write32((encoded_size << 30) | (0b1111 << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) | \ + (size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt); +} + +void ARM64FloatEmitter::Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + _assert_msg_(DYNA_REC, IsQuad(Rd), "%s only supports double and single registers!", __FUNCTION__); + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + + Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (Rm << 16) | \ + (opcode << 12) | (1 << 11) | (Rn << 5) | Rd); +} + +void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + _assert_msg_(DYNA_REC, IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__); + bool quad = IsQuad(Rd); + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + + Write32((quad << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | \ + (Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd); +} + +void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn) +{ + _assert_msg_(DYNA_REC, Rn <= SP, "%s only supports VFP registers!", __FUNCTION__); + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + + Write32((Q << 30) | (op << 29) | (0b111 << 25) | (imm5 << 16) | (imm4 << 11) | \ + (1 << 10) | (Rn << 5) | Rd); +} + +void ARM64FloatEmitter::Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) +{ + _assert_msg_(DYNA_REC, IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__); + bool quad = IsQuad(Rd); + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + + Write32((quad << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | \ + (opcode << 12) | (1 << 11) | (Rn << 5) | Rd); +} + +void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn) +{ + _assert_msg_(DYNA_REC, IsSingle(Rt), "%s doesn't support singles!", __FUNCTION__); + bool quad = IsQuad(Rt); + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + + Write32((quad << 30) | (0b1101 << 24) | (L << 22) | (R << 21) | (opcode << 13) | \ + (S << 12) | (size << 10) | (Rn << 5) | Rt); +} + +void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) +{ + _assert_msg_(DYNA_REC, IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__); + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + + Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (opcode << 15) | \ + (1 << 14) | (Rn << 5) | Rd); +} + +void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) +{ + _assert_msg_(DYNA_REC, !(Rn <= SP), "%s only supports GPR as source!", __FUNCTION__); + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + + Write32((sf << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (rmode << 19) | \ + (opcode << 16) | (Rn << 5) | Rd); +} + +void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm) +{ + bool is_double = !IsSingle(Rn); + + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + + Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (is_double << 22) | (Rm << 16) | \ + (op << 14) | (1 << 13) | (Rn << 5) | opcode2); +} + +void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + bool is_double = !IsSingle(Rd); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + + Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (is_double << 22) | (Rm << 16) | \ + (cond << 12) | (0b11 << 10) | (Rn << 5) | Rd); +} + +void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + _assert_msg_(DYNA_REC, IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__); + + bool quad = IsQuad(Rd); + + u32 encoded_size = 0; + if (size == 16) + encoded_size = 1; + else if (size == 32) + encoded_size = 2; + else if (size == 64) + encoded_size = 3; + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + + Write32((quad << 30) | (0b111 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | \ + (1 << 11) | (Rn << 5) | Rd); +} + +void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) +{ + EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm); +} +void ARM64FloatEmitter::STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) +{ + EmitLoadStoreImmediate(size, 0, type, Rt, Rn, imm); +} + +// Loadstore single structure +void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn) +{ + EmitLoadStoreSingleStructure(1, 0, 0b110, 0, size >> 4, Rt, Rn); +} + +// Scalar - 2 Source +void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + Emit2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm); +} + +// Vector +void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(0, 0, 0b00011, Rd, Rn, Rm); +} +void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(1, 1, 0b00011, Rd, Rn, Rm); +} +void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index) +{ + u32 imm5 = 0; + + if (size == 8) + { + imm5 = 1; + imm5 |= index << 1; + } + else if (size == 16) + { + imm5 = 2; + imm5 |= index << 2; + } + else if (size == 32) + { + imm5 = 4; + imm5 |= index << 3; + } + else if (size == 64) + { + imm5 = 8; + imm5 |= index << 4; + } + + EmitCopy(IsQuad(Rd), 0, imm5, 0, Rd, Rn); +} +void ARM64FloatEmitter::FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(0, 2 | (size >> 6), 0b01111, Rd, Rn); +} +void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(0, size >> 6, 0b11010, Rd, Rn, Rm); +} +void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(0, size >> 6, 0b10111, Rd, Rn); +} +void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(1, size >> 6, 0b11111, Rd, Rn, Rm); +} +void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(1, size >> 6, 0b11011, Rd, Rn, Rm); +} +void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(1, 2 | (size >> 6), 0b01111, Rd, Rn); +} +void ARM64FloatEmitter::FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(1, 2 | (size >> 6), 0b11101, Rd, Rn); +} +void ARM64FloatEmitter::FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(0, 2 | (size >> 6), 0b11010, Rd, Rn, Rm); +} +void ARM64FloatEmitter::NOT(ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(1, 0, 0b00101, Rd, Rn); +} +void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(0, 2, 0b00011, Rd, Rn, Rm); +} +void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(0, 1 | (size >> 4), 0, Rd, Rn); +} +void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(1, size >> 4, 0, Rd, Rn); +} +void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(0, size >> 4, 0, Rd, Rn); +} + +// Move +void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + u32 imm5 = 0; + + if (size == 8) + imm5 = 1; + else if (size == 16) + imm5 = 2; + else if (size == 32) + imm5 = 4; + else if (size == 64) + imm5 = 8; + + EmitCopy(IsQuad(Rd), 0, imm5, 0b0001, Rd, Rn); + +} +void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn) +{ + u32 imm5 = 0; + + if (size == 8) + { + imm5 = 1; + imm5 |= index << 1; + } + else if (size == 16) + { + imm5 = 2; + imm5 |= index << 2; + } + else if (size == 32) + { + imm5 = 4; + imm5 |= index << 3; + } + else if (size == 64) + { + imm5 = 8; + imm5 |= index << 4; + } + + EmitCopy(1, 0, imm5, 0b0011, Rd, Rn); +} +void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2) +{ + u32 imm5 = 0, imm4 = 0; + + if (size == 8) + { + imm5 = 1; + imm5 |= index1 << 1; + imm4 = index2; + } + else if (size == 16) + { + imm5 = 2; + imm5 |= index1 << 2; + imm4 = index2 << 1; + } + else if (size == 32) + { + imm5 = 4; + imm5 |= index1 << 3; + imm4 = index2 << 2; + } + else if (size == 64) + { + imm5 = 8; + imm5 |= index1 << 4; + imm4 = index2 << 3; + } + + EmitCopy(1, 1, imm5, imm4, Rd, Rn); +} + +// One source +void ARM64FloatEmitter::FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn) +{ + u32 dst_encoding = 0; + u32 src_encoding = 0; + + if (size_to == 16) + dst_encoding = 3; + else if (size_to == 32) + dst_encoding = 0; + else if (size_to == 64) + dst_encoding = 1; + + if (size_from == 16) + src_encoding = 3; + else if (size_from == 32) + src_encoding = 0; + else if (size_from == 64) + src_encoding = 1; + + Emit1Source(0, 0, src_encoding, 0b100 | dst_encoding, Rd, Rn); +} + +// Conversion between float and integer +void ARM64FloatEmitter::FMOV(u8 size, bool top, ARM64Reg Rd, ARM64Reg Rn) +{ + bool sf = size == 64 ? true : false; + u32 type = 0; + u32 rmode = top ? 1 : 0; + if (size == 64) + { + if (top) + type = 2; + else + type = 1; + } + + EmitConversion(sf, 0, type, rmode, IsVector(Rd) ? 0b111 : 0b110, Rd, Rn); +} + +void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm) +{ + EmitCompare(0, 0, 0, 0, Rn, Rm); +} +void ARM64FloatEmitter::FCMP(ARM64Reg Rn) +{ + EmitCompare(0, 0, 0, 0b01000, Rn, (ARM64Reg)0); +} +void ARM64FloatEmitter::FCMPE(ARM64Reg Rn, ARM64Reg Rm) +{ + EmitCompare(0, 0, 0, 0b10000, Rn, Rm); +} +void ARM64FloatEmitter::FCMPE(ARM64Reg Rn) +{ + EmitCompare(0, 0, 0, 0b11000, Rn, (ARM64Reg)0); +} +void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(0, size >> 6, 0b11100, Rd, Rn, Rm); +} +void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(0, 2 | (size >> 6), 0b01101, Rd, Rn); +} +void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(1, size >> 6, 0b11100, Rd, Rn, Rm); +} +void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(1, 2 | (size >> 6), 0b01100, Rd, Rn); +} +void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitThreeSame(1, 2 | (size >> 6), 0b11100, Rd, Rn, Rm); +} +void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(0, 2 | (size >> 6), 0b01100, Rd, Rn); +} +void ARM64FloatEmitter::FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(1, 2 | (size >> 6), 0b01101, Rd, Rn); +} +void ARM64FloatEmitter::FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(0, 2 | (size >> 6), 0b01110, Rd, Rn); +} + +void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EmitCondSelect(0, 0, cond, Rd, Rn, Rm); +} + +// Permute +void ARM64FloatEmitter::UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitPermute(size, 0b001, Rd, Rn, Rm); +} +void ARM64FloatEmitter::TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitPermute(size, 0b010, Rd, Rn, Rm); +} +void ARM64FloatEmitter::ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitPermute(size, 0b011, Rd, Rn, Rm); +} +void ARM64FloatEmitter::UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitPermute(size, 0b101, Rd, Rn, Rm); +} +void ARM64FloatEmitter::TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitPermute(size, 0b110, Rd, Rn, Rm); +} +void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EmitPermute(size, 0b111, Rd, Rn, Rm); +} + +void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) +{ + for (auto it : registers) + STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16); + +} +void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask) +{ + for (int i = 31; i >= 0; --i) + { + if (!registers[i]) + continue; + + if (ignore_mask[i]) + m_emit->ADD(SP, SP, 16); + else + LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16); + } +} + } diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 0933d2c1b3..f6e09e6615 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -280,6 +280,8 @@ public: class ARM64XEmitter { + friend class ARM64FloatEmitter; + private: u8* m_code; u8* m_startcode; @@ -577,6 +579,96 @@ public: void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0)); }; +class ARM64FloatEmitter +{ +public: + ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) {} + + void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm); + void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm); + + // Loadstore single structure + void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn); + + // Scalar - 2 Source + void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + + // Vector + void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index); + void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void NOT(ARM64Reg Rd, ARM64Reg Rn); + void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn); + + // Move + void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn); + void INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2); + + // One source + void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn); + + // Conversion between float and integer + void FMOV(u8 size, bool top, ARM64Reg Rd, ARM64Reg Rn); + + // Float comparison + void FCMP(ARM64Reg Rn, ARM64Reg Rm); + void FCMP(ARM64Reg Rn); + void FCMPE(ARM64Reg Rn, ARM64Reg Rm); + void FCMPE(ARM64Reg Rn); + void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn); + void FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn); + + // Conditional select + void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + + // Permute + void UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + + // ABI related + void ABI_PushRegisters(BitSet32 registers); + void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0)); + +private: + ARM64XEmitter* m_emit; + inline void Write32(u32 value) { m_emit->Write32(value); } + + // Emitting functions + void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm); + void Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn); + void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); + void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn); + void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); + void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); + void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm); + void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); +}; + class ARM64CodeBlock : public CodeBlock { private: