Remove x86_32 from JitCommon.

This commit is contained in:
Ryan Houdek 2014-05-27 23:22:36 -05:00
parent f2457bbe9d
commit e357fea279
4 changed files with 2 additions and 213 deletions

View File

@ -126,7 +126,6 @@ void CommonAsmRoutines::GenQuantizedStores()
UD2(); UD2();
const u8* storePairedFloat = AlignCode4(); const u8* storePairedFloat = AlignCode4();
#if _M_X86_64
SHUFPS(XMM0, R(XMM0), 1); SHUFPS(XMM0, R(XMM0), 1);
MOVQ_xmm(M(&psTemp[0]), XMM0); MOVQ_xmm(M(&psTemp[0]), XMM0);
TEST(32, R(ECX), Imm32(0x0C000000)); TEST(32, R(ECX), Imm32(0x0C000000));
@ -140,27 +139,6 @@ void CommonAsmRoutines::GenQuantizedStores()
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
SetJumpTarget(skip_complex); SetJumpTarget(skip_complex);
RET(); RET();
#else
TEST(32, R(ECX), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ, true);
MOVQ_xmm(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp));
BSWAP(32, EAX);
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
MOV(32, R(EAX), M(((char*)&psTemp) + 4));
BSWAP(32, EAX);
MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
FixupBranch arg2 = J(true);
SetJumpTarget(argh);
SHUFPS(XMM0, R(XMM0), 1);
MOVQ_xmm(M(&psTemp[0]), XMM0);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
ABI_CallFunctionR((void *)&WriteDual32, ECX);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
SetJumpTarget(arg2);
RET();
#endif
const u8* storePairedU8 = AlignCode4(); const u8* storePairedU8 = AlignCode4();
SHR(32, R(EAX), Imm8(6)); SHR(32, R(EAX), Imm8(6));
@ -343,64 +321,24 @@ void CommonAsmRoutines::GenQuantizedLoads()
const u8* loadPairedFloatTwo = AlignCode4(); const u8* loadPairedFloatTwo = AlignCode4();
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3) {
#if _M_X86_64
MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
#endif
PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
} else { } else {
#if _M_X86_64
LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0)); LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0));
ROL(64, R(RCX), Imm8(32)); ROL(64, R(RCX), Imm8(32));
MOVQ_xmm(XMM0, R(RCX)); MOVQ_xmm(XMM0, R(RCX));
#else
#if 0
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
PXOR(XMM1, R(XMM1));
PSHUFLW(XMM0, R(XMM0), 0xB1);
MOVAPD(XMM1, R(XMM0));
PSRLW(XMM0, 8);
PSLLW(XMM1, 8);
POR(XMM0, R(XMM1));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
BSWAP(32, EAX);
MOV(32, M(&psTemp[0]), R(RAX));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
BSWAP(32, EAX);
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
MOVQ_xmm(XMM0, M(&psTemp[0]));
#endif
#endif
} }
RET(); RET();
const u8* loadPairedFloatOne = AlignCode4(); const u8* loadPairedFloatOne = AlignCode4();
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3) {
#if _M_X86_64
MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVD_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
#endif
PSHUFB(XMM0, M((void *)pbswapShuffle1x4)); PSHUFB(XMM0, M((void *)pbswapShuffle1x4));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
} else { } else {
#if _M_X86_64
LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0)); LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0));
MOVD_xmm(XMM0, R(RCX)); MOVD_xmm(XMM0, R(RCX));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
BSWAP(32, EAX);
MOVD_xmm(XMM0, R(EAX));
UNPCKLPS(XMM0, M((void*)m_one));
#endif
} }
RET(); RET();

View File

@ -19,24 +19,20 @@
using namespace Gen; using namespace Gen;
#if _M_X86_64 extern u8 *trampolineCodePtr;
static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) {
u64 code_addr = (u64)codePtr; u64 code_addr = (u64)codePtr;
disassembler disasm; disassembler disasm;
char disbuf[256]; char disbuf[256];
memset(disbuf, 0, 256); memset(disbuf, 0, 256);
#if _M_X86_32
disasm.disasm32(0, code_addr, codePtr, disbuf);
#else
disasm.disasm64(0, code_addr, codePtr, disbuf); disasm.disasm64(0, code_addr, codePtr, disbuf);
#endif
PanicAlert("%s\n\n" PanicAlert("%s\n\n"
"Error encountered accessing emulated address %08x.\n" "Error encountered accessing emulated address %08x.\n"
"Culprit instruction: \n%s\nat %#" PRIx64, "Culprit instruction: \n%s\nat %#" PRIx64,
text.c_str(), emAddress, disbuf, code_addr); text.c_str(), emAddress, disbuf, code_addr);
return; return;
} }
#endif
void TrampolineCache::Init() void TrampolineCache::Init()
{ {
@ -55,7 +51,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
PanicAlert("Trampoline cache full"); PanicAlert("Trampoline cache full");
const u8 *trampoline = GetCodePtr(); const u8 *trampoline = GetCodePtr();
#if _M_X86_64
X64Reg addrReg = (X64Reg)info.scaledReg; X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg; X64Reg dataReg = (X64Reg)info.regOperandReg;
@ -96,7 +91,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
ABI_PopRegistersAndAdjustStack(registersInUse, true); ABI_PopRegistersAndAdjustStack(registersInUse, true);
RET(); RET();
#endif
return trampoline; return trampoline;
} }
@ -108,7 +102,6 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
const u8 *trampoline = GetCodePtr(); const u8 *trampoline = GetCodePtr();
#if _M_X86_64
X64Reg dataReg = (X64Reg)info.regOperandReg; X64Reg dataReg = (X64Reg)info.regOperandReg;
X64Reg addrReg = (X64Reg)info.scaledReg; X64Reg addrReg = (X64Reg)info.scaledReg;
@ -158,7 +151,6 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
ABI_PopRegistersAndAdjustStack(registersInUse, true); ABI_PopRegistersAndAdjustStack(registersInUse, true);
RET(); RET();
#endif
return trampoline; return trampoline;
} }
@ -170,7 +162,6 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
// that many of them in a typical program/game. // that many of them in a typical program/game.
const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{ {
#if _M_X86_64
SContext *ctx = (SContext *)ctx_void; SContext *ctx = (SContext *)ctx_void;
if (!jit->IsInCodeSpace(codePtr)) if (!jit->IsInCodeSpace(codePtr))
@ -271,7 +262,4 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
} }
return start; return start;
} }
#else
return 0;
#endif
} }

View File

@ -33,16 +33,6 @@ const int BACKPATCH_SIZE = 5;
#define CTX_R14 R14 #define CTX_R14 R14
#define CTX_R15 R15 #define CTX_R15 R15
#define CTX_RIP Rip #define CTX_RIP Rip
#elif _M_X86_32
#define CTX_EAX Eax
#define CTX_EBX Ebx
#define CTX_ECX Ecx
#define CTX_EDX Edx
#define CTX_EDI Edi
#define CTX_ESI Esi
#define CTX_EBP Ebp
#define CTX_ESP Esp
#define CTX_EIP Eip
#else #else
#error No context definition for OS #error No context definition for OS
#endif #endif
@ -68,17 +58,6 @@ const int BACKPATCH_SIZE = 5;
#define CTX_R14 __r14 #define CTX_R14 __r14
#define CTX_R15 __r15 #define CTX_R15 __r15
#define CTX_RIP __rip #define CTX_RIP __rip
#elif _M_X86_32
typedef x86_thread_state32_t SContext;
#define CTX_EAX __eax
#define CTX_EBX __ebx
#define CTX_ECX __ecx
#define CTX_EDX __edx
#define CTX_EDI __edi
#define CTX_ESI __esi
#define CTX_EBP __ebp
#define CTX_ESP __esp
#define CTX_EIP __eip
#else #else
#error No context definition for OS #error No context definition for OS
#endif #endif
@ -104,32 +83,6 @@ const int BACKPATCH_SIZE = 5;
#define CTX_R14 gregs[REG_R14] #define CTX_R14 gregs[REG_R14]
#define CTX_R15 gregs[REG_R15] #define CTX_R15 gregs[REG_R15]
#define CTX_RIP gregs[REG_RIP] #define CTX_RIP gregs[REG_RIP]
#elif _M_X86_32
#ifdef ANDROID
#include <asm/sigcontext.h>
typedef sigcontext SContext;
#define CTX_EAX eax
#define CTX_EBX ebx
#define CTX_ECX ecx
#define CTX_EDX edx
#define CTX_EDI edi
#define CTX_ESI esi
#define CTX_EBP ebp
#define CTX_ESP esp
#define CTX_EIP eip
#else
#include <ucontext.h>
typedef mcontext_t SContext;
#define CTX_EAX gregs[REG_EAX]
#define CTX_EBX gregs[REG_EBX]
#define CTX_ECX gregs[REG_ECX]
#define CTX_EDX gregs[REG_EDX]
#define CTX_EDI gregs[REG_EDI]
#define CTX_ESI gregs[REG_ESI]
#define CTX_EBP gregs[REG_EBP]
#define CTX_ESP gregs[REG_ESP]
#define CTX_EIP gregs[REG_EIP]
#endif
#elif _M_ARM_32 #elif _M_ARM_32
// Add others if required. // Add others if required.
typedef struct sigcontext SContext; typedef struct sigcontext SContext;
@ -158,16 +111,6 @@ const int BACKPATCH_SIZE = 5;
#define CTX_R14 __gregs[_REG_R14] #define CTX_R14 __gregs[_REG_R14]
#define CTX_R15 __gregs[_REG_R15] #define CTX_R15 __gregs[_REG_R15]
#define CTX_RIP __gregs[_REG_RIP] #define CTX_RIP __gregs[_REG_RIP]
#elif _M_X86_32
#define CTX_EAX __gregs[__REG_EAX]
#define CTX_EBX __gregs[__REG_EBX]
#define CTX_ECX __gregs[__REG_ECX]
#define CTX_EDX __gregs[__REG_EDX]
#define CTX_EDI __gregs[__REG_EDI]
#define CTX_ESI __gregs[__REG_ESI]
#define CTX_EBP __gregs[__REG_EBP]
#define CTX_ESP __gregs[__REG_ESP]
#define CTX_EIP __gregs[__REG_EIP]
#else #else
#error No context definition for OS #error No context definition for OS
#endif #endif
@ -192,16 +135,6 @@ const int BACKPATCH_SIZE = 5;
#define CTX_R14 mc_r14 #define CTX_R14 mc_r14
#define CTX_R15 mc_r15 #define CTX_R15 mc_r15
#define CTX_RIP mc_rip #define CTX_RIP mc_rip
#elif _M_X86_32
#define CTX_EAX mc_eax
#define CTX_EBX mc_ebx
#define CTX_ECX mc_ecx
#define CTX_EDX mc_edx
#define CTX_EDI mc_edi
#define CTX_ESI mc_esi
#define CTX_EBP mc_ebp
#define CTX_ESP mc_esp
#define CTX_EIP mc_eip
#else #else
#error No context definition for OS #error No context definition for OS
#endif #endif
@ -233,8 +166,6 @@ static inline u64 *ContextRN(SContext* ctx, int n)
}; };
return (u64 *) ((char *) ctx + offsets[n]); return (u64 *) ((char *) ctx + offsets[n]);
} }
#elif _M_X86_32
#define CTX_PC CTX_EIP
#endif #endif
class TrampolineCache : public Gen::X64CodeBlock class TrampolineCache : public Gen::X64CodeBlock

View File

@ -41,12 +41,7 @@ void EmuCodeBlock::SwapAndStore(int size, const Gen::OpArg& dst, Gen::X64Reg src
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{ {
#if _M_X86_64
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
#else
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
#endif
if (accessSize == 32) if (accessSize == 32)
{ {
BSWAP(32, reg_value); BSWAP(32, reg_value);
@ -68,18 +63,12 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc
void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset) void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset)
{ {
#if _M_X86_64
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
#else
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
#endif
} }
u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend) u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend)
{ {
u8 *result; u8 *result;
#if _M_X86_64
if (opAddress.IsSimpleReg()) if (opAddress.IsSimpleReg())
{ {
// Deal with potential wraparound. (This is just a heuristic, and it would // Deal with potential wraparound. (This is just a heuristic, and it would
@ -109,27 +98,6 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
else else
MOVZX(64, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset)); MOVZX(64, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
} }
#else
if (opAddress.IsImm())
{
result = GetWritableCodePtr();
if (accessSize == 8 && signExtend)
MOVSX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
else
MOVZX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
}
else
{
if (!opAddress.IsSimpleReg(reg_value))
MOV(32, R(reg_value), opAddress);
AND(32, R(reg_value), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
if (accessSize == 8 && signExtend)
MOVSX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset));
else
MOVZX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset));
}
#endif
switch (accessSize) switch (accessSize)
{ {
@ -281,7 +249,6 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
{ {
registersInUse &= ~(1 << RAX | 1 << reg_value); registersInUse &= ~(1 << RAX | 1 << reg_value);
} }
#if _M_X86_64
if (!Core::g_CoreStartupParameter.bMMU && if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem && Core::g_CoreStartupParameter.bFastmem &&
!opAddress.IsImm() && !opAddress.IsImm() &&
@ -296,7 +263,6 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
registersInUseAtLoc[mov] = registersInUse; registersInUseAtLoc[mov] = registersInUse;
} }
else else
#endif
{ {
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
@ -411,7 +377,6 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
if (accessSize == 8 && reg_value >= 4) { if (accessSize == 8 && reg_value >= 4) {
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
} }
#if _M_X86_64
result = GetWritableCodePtr(); result = GetWritableCodePtr();
OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset);
if (swap) if (swap)
@ -431,15 +396,6 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
{ {
MOV(accessSize, dest, R(reg_value)); MOV(accessSize, dest, R(reg_value));
} }
#else
if (swap)
{
BSWAP(accessSize, reg_value);
}
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
#endif
return result; return result;
} }
@ -447,7 +403,6 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags)
{ {
registersInUse &= ~(1 << RAX); registersInUse &= ~(1 << RAX);
#if _M_X86_64
if (!Core::g_CoreStartupParameter.bMMU && if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem && Core::g_CoreStartupParameter.bFastmem &&
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM))
@ -468,7 +423,6 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
registersInUseAtLoc[mov] = registersInUse; registersInUseAtLoc[mov] = registersInUse;
return; return;
} }
#endif
if (offset) if (offset)
ADD(32, R(reg_addr), Imm32((u32)offset)); ADD(32, R(reg_addr), Imm32((u32)offset));
@ -517,17 +471,10 @@ void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offs
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap) void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
{ {
#if _M_X86_64
if (swap) if (swap)
SwapAndStore(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg); SwapAndStore(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
else else
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg)); MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg));
#else
if (swap)
SwapAndStore(accessSize, M((void*)(Memory::base + (address & Memory::MEMVIEW32_MASK))), arg);
else
MOV(accessSize, M((void*)(Memory::base + (address & Memory::MEMVIEW32_MASK))), R(arg));
#endif
} }
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) { void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) {
@ -553,17 +500,10 @@ static u64 GC_ALIGNED16(temp64);
static const float GC_ALIGNED16(m_zero[]) = { 0.0f, 0.0f, 0.0f, 0.0f }; static const float GC_ALIGNED16(m_zero[]) = { 0.0f, 0.0f, 0.0f, 0.0f };
#if _M_X86_64
static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0, 0x0000000000400000); static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0, 0x0000000000400000);
static const __m128i GC_ALIGNED16(single_exponent) = _mm_set_epi64x(0, 0x000000007f800000); static const __m128i GC_ALIGNED16(single_exponent) = _mm_set_epi64x(0, 0x000000007f800000);
static const __m128i GC_ALIGNED16(double_qnan_bit) = _mm_set_epi64x(0, 0x0008000000000000); static const __m128i GC_ALIGNED16(double_qnan_bit) = _mm_set_epi64x(0, 0x0008000000000000);
static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi64x(0, 0x7ff0000000000000); static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi64x(0, 0x7ff0000000000000);
#else
static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi32(0, 0, 0x00000000, 0x00400000);
static const __m128i GC_ALIGNED16(single_exponent) = _mm_set_epi32(0, 0, 0x00000000, 0x7f800000);
static const __m128i GC_ALIGNED16(double_qnan_bit) = _mm_set_epi32(0, 0, 0x00080000, 0x00000000);
static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi32(0, 0, 0x7ff00000, 0x00000000);
#endif
// Since the following float conversion functions are used in non-arithmetic PPC float instructions, // Since the following float conversion functions are used in non-arithmetic PPC float instructions,
// they must convert floats bitexact and never flush denormals to zero or turn SNaNs into QNaNs. // they must convert floats bitexact and never flush denormals to zero or turn SNaNs into QNaNs.
@ -578,19 +518,11 @@ static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi32(0, 0, 0x7ff00
//#define MORE_ACCURATE_DOUBLETOSINGLE //#define MORE_ACCURATE_DOUBLETOSINGLE
#ifdef MORE_ACCURATE_DOUBLETOSINGLE #ifdef MORE_ACCURATE_DOUBLETOSINGLE
#if _M_X86_64
static const __m128i GC_ALIGNED16(double_fraction) = _mm_set_epi64x(0, 0x000fffffffffffff); static const __m128i GC_ALIGNED16(double_fraction) = _mm_set_epi64x(0, 0x000fffffffffffff);
static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0, 0x8000000000000000); static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0, 0x8000000000000000);
static const __m128i GC_ALIGNED16(double_explicit_top_bit) = _mm_set_epi64x(0, 0x0010000000000000); static const __m128i GC_ALIGNED16(double_explicit_top_bit) = _mm_set_epi64x(0, 0x0010000000000000);
static const __m128i GC_ALIGNED16(double_top_two_bits) = _mm_set_epi64x(0, 0xc000000000000000); static const __m128i GC_ALIGNED16(double_top_two_bits) = _mm_set_epi64x(0, 0xc000000000000000);
static const __m128i GC_ALIGNED16(double_bottom_bits) = _mm_set_epi64x(0, 0x07ffffffe0000000); static const __m128i GC_ALIGNED16(double_bottom_bits) = _mm_set_epi64x(0, 0x07ffffffe0000000);
#else
static const __m128i GC_ALIGNED16(double_fraction) = _mm_set_epi32(0, 0, 0x000fffff, 0xffffffff);
static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi32(0, 0, 0x80000000, 0x00000000);
static const __m128i GC_ALIGNED16(double_explicit_top_bit) = _mm_set_epi32(0, 0, 0x00100000, 0x00000000);
static const __m128i GC_ALIGNED16(double_top_two_bits) = _mm_set_epi32(0, 0, 0xc0000000, 0x00000000);
static const __m128i GC_ALIGNED16(double_bottom_bits) = _mm_set_epi32(0, 0, 0x07ffffff, 0xe0000000);
#endif
// This is the same algorithm used in the interpreter (and actual hardware) // This is the same algorithm used in the interpreter (and actual hardware)
// The documentation states that the conversion of a double with an outside the // The documentation states that the conversion of a double with an outside the