dolphin/Source/Core/Common/x64FPURoundMode.cpp
Tillmann Karras db196d8c5b Jit64[IL]: fix float conversions
Floating-point is complicated...

Some background: Denormals are floats that are too close to zero to be
stored in a normalized way (their exponent would need more bits). Since
they are stored unnormalized, they are hard to work with, even in
hardware.  That's why both PowerPC and SSE can be configured to operate
in faster but non-standard-conpliant modes in which these numbers are
simply rounded ('flushed') to zero.

Internally, we do the same as the PowerPC CPU and store all floats in
double format. This means that for loading and storing singles we need a
conversion. The PowerPC CPU does this in hardware. We previously did
this using CVTSS2SD/CVTSD2SS. Unfortunately, these instructions are
considered arithmetic and therefore flush denormals to zero if non-IEEE
mode is active. This normally wouldn't be a problem since the next
arithmetic floating-point instruction would do the same anyway but as it
turns out some games actually use floating-point instructions for
copying arbitrary data.

My idea for fixing this problem was to use x87 instructions since the
x87 FPU never supported flush-to-zero and thus doesn't mangle denormals.
However, there is one more problem to deal with: SNaNs are automatically
converted to QNaNs (by setting the most-significant bit of the
fraction). I opted to fix this by manually resetting the QNaN bit of all
values with all-1s exponent.
2014-02-12 23:12:15 +01:00

125 lines
3.1 KiB
C++

// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common.h"
#include "FPURoundMode.h"
#include "CPUDetect.h"
#ifndef _WIN32
static const unsigned short FPU_ROUND_NEAR = 0 << 10;
static const unsigned short FPU_ROUND_DOWN = 1 << 10;
static const unsigned short FPU_ROUND_UP = 2 << 10;
static const unsigned short FPU_ROUND_CHOP = 3 << 10;
static const unsigned short FPU_ROUND_MASK = 3 << 10;
#include <xmmintrin.h>
#endif
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
static const u32 EXCEPTION_MASK = 0x1F80;
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
static const u32 DAZ = 0x40;
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
static const u32 FTZ = 0x8000;
namespace FPURoundMode
{
// Get the default SSE states here.
static u32 saved_sse_state = _mm_getcsr();
static const u32 default_sse_state = _mm_getcsr();
void SetRoundMode(u32 mode)
{
// Set FPU rounding mode to mimic the PowerPC's
#ifdef _M_IX86
// This shouldn't really be needed anymore since we use SSE
#ifdef _WIN32
const int table[4] =
{
_RC_NEAR,
_RC_CHOP,
_RC_UP,
_RC_DOWN
};
_set_controlfp(_MCW_RC, table[mode]);
#else
const unsigned short table[4] =
{
FPU_ROUND_NEAR,
FPU_ROUND_CHOP,
FPU_ROUND_UP,
FPU_ROUND_DOWN
};
unsigned short _mode;
asm ("fstcw %0" : "=m" (_mode) : );
_mode = (_mode & ~FPU_ROUND_MASK) | table[mode];
asm ("fldcw %0" : : "m" (_mode));
#endif
#endif
}
void SetPrecisionMode(u32 mode)
{
#ifdef _M_IX86
// sets the floating-point lib to 53-bit
// PowerPC has a 53bit floating pipeline only
// eg: sscanf is very sensitive
#ifdef _WIN32
_control87(_PC_53, MCW_PC);
#else
const unsigned short table[4] = {
0 << 8, // FPU_PREC_24
2 << 8, // FPU_PREC_53
3 << 8, // FPU_PREC_64
3 << 8, // FPU_PREC_MASK
};
unsigned short _mode;
asm ("fstcw %0" : "=m" (_mode));
_mode = (_mode & ~table[3]) | table[mode];
asm ("fldcw %0" : : "m" (_mode));
#endif
#else
//x64 doesn't need this - fpu is done with SSE
//but still - set any useful sse options here
#endif
}
void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode)
{
// lookup table for FPSCR.RN-to-MXCSR.RC translation
static const u32 roundingModeLUT[4] =
{
(0 << 13) | EXCEPTION_MASK, // nearest
(3 << 13) | EXCEPTION_MASK, // -inf
(2 << 13) | EXCEPTION_MASK, // +inf
(1 << 13) | EXCEPTION_MASK, // zero
};
u32 csr = roundingModeLUT[roundingMode];
static const u32 denormalLUT[2] =
{
FTZ, // flush-to-zero only
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
};
// FIXME: proper (?) non-IEEE mode emulation causes issues in lots of games
if (nonIEEEMode && false)
{
csr |= denormalLUT[cpu_info.bFlushToZero];
}
_mm_setcsr(csr);
}
void SaveSIMDState()
{
saved_sse_state = _mm_getcsr();
}
void LoadSIMDState()
{
_mm_setcsr(saved_sse_state);
}
void LoadDefaultSIMDState()
{
_mm_setcsr(default_sse_state);
}
}