x64FPURoundMode: move things around a bit

This commit is contained in:
Tillmann Karras 2014-02-23 14:57:31 +01:00
parent 269e2aefa7
commit 1a428de189
4 changed files with 46 additions and 46 deletions

View File

@ -11,20 +11,20 @@ namespace FPURoundMode
enum RoundModes enum RoundModes
{ {
ROUND_NEAR = 0, ROUND_NEAR = 0,
ROUND_CHOP, ROUND_CHOP = 1,
ROUND_UP, ROUND_UP = 2,
ROUND_DOWN ROUND_DOWN = 3
}; };
enum PrecisionModes { enum PrecisionModes {
PREC_24 = 0, PREC_24 = 0,
PREC_53, PREC_53 = 1,
PREC_64 PREC_64 = 2
}; };
void SetRoundMode(u32 mode); void SetRoundMode(enum RoundModes mode);
void SetPrecisionMode(u32 mode); void SetPrecisionMode(enum PrecisionModes mode);
void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode); void SetSIMDMode(enum RoundModes rounding_mode, bool non_ieee_mode);
/* /*
* There are two different flavors of float to int conversion: * There are two different flavors of float to int conversion:

View File

@ -21,13 +21,13 @@
// Generic, do nothing // Generic, do nothing
namespace FPURoundMode namespace FPURoundMode
{ {
void SetRoundMode(u32 mode) void SetRoundMode(enum RoundModes mode)
{ {
} }
void SetPrecisionMode(u32 mode) void SetPrecisionMode(enum PrecisionModes mode)
{ {
} }
void SetSIMDMode(u32 mode, u32 nonIEEEMode) void SetSIMDMode(enum RoundModes rounding_mode, bool non_ieee_mode)
{ {
} }
void SaveSIMDState() void SaveSIMDState()

View File

@ -4,30 +4,21 @@
#include "Common/Common.h" #include "Common/Common.h"
#include "Common/CPUDetect.h" #include "Common/CPUDetect.h"
#include "Common/FPURoundMode.h"
#ifndef _WIN32 #ifdef _WIN32
static const unsigned short FPU_ROUND_NEAR = 0 << 10; # include <mmintrin.h>
static const unsigned short FPU_ROUND_DOWN = 1 << 10; #else
static const unsigned short FPU_ROUND_UP = 2 << 10; # include <xmmintrin.h>
static const unsigned short FPU_ROUND_CHOP = 3 << 10;
static const unsigned short FPU_ROUND_MASK = 3 << 10;
#include <xmmintrin.h>
#endif #endif
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
static const u32 EXCEPTION_MASK = 0x1F80;
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
static const u32 DAZ = 0x40;
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
static const u32 FTZ = 0x8000;
namespace FPURoundMode namespace FPURoundMode
{ {
// Get the default SSE states here. // Get the default SSE states here.
static u32 saved_sse_state = _mm_getcsr(); static u32 saved_sse_state = _mm_getcsr();
static const u32 default_sse_state = _mm_getcsr(); static const u32 default_sse_state = _mm_getcsr();
void SetRoundMode(u32 mode) void SetRoundMode(enum RoundModes mode)
{ {
// Set FPU rounding mode to mimic the PowerPC's // Set FPU rounding mode to mimic the PowerPC's
#ifdef _M_IX86 #ifdef _M_IX86
@ -42,22 +33,23 @@ namespace FPURoundMode
}; };
_set_controlfp(_MCW_RC, table[mode]); _set_controlfp(_MCW_RC, table[mode]);
#else #else
const unsigned short table[4] = const unsigned short X87_ROUND_MASK = 3 << 10;
const unsigned short x87_rounding_table[] =
{ {
FPU_ROUND_NEAR, 0 << 10, // nearest
FPU_ROUND_CHOP, 3 << 10, // zero
FPU_ROUND_UP, 2 << 10, // +inf
FPU_ROUND_DOWN 1 << 10, // -inf
}; };
unsigned short _mode; unsigned short _mode;
asm ("fstcw %0" : "=m" (_mode) : ); asm ("fstcw %0" : "=m" (_mode));
_mode = (_mode & ~FPU_ROUND_MASK) | table[mode]; _mode = (_mode & ~X87_ROUND_MASK) | x87_rounding_table[mode];
asm ("fldcw %0" : : "m" (_mode)); asm ("fldcw %0" : : "m" (_mode));
#endif #endif
#endif #endif
} }
void SetPrecisionMode(u32 mode) void SetPrecisionMode(enum PrecisionModes mode)
{ {
#ifdef _M_IX86 #ifdef _M_IX86
// sets the floating-point lib to 53-bit // sets the floating-point lib to 53-bit
@ -66,15 +58,15 @@ namespace FPURoundMode
#ifdef _WIN32 #ifdef _WIN32
_control87(_PC_53, MCW_PC); _control87(_PC_53, MCW_PC);
#else #else
const unsigned short table[4] = { const unsigned short PRECISION_MASK = 3 << 8;
0 << 8, // FPU_PREC_24 const unsigned short precision_table[] = {
2 << 8, // FPU_PREC_53 0 << 8, // 24 bits
3 << 8, // FPU_PREC_64 2 << 8, // 53 bits
3 << 8, // FPU_PREC_MASK 3 << 8, // 64 bits
}; };
unsigned short _mode; unsigned short _mode;
asm ("fstcw %0" : "=m" (_mode)); asm ("fstcw %0" : "=m" (_mode));
_mode = (_mode & ~table[3]) | table[mode]; _mode = (_mode & ~PRECISION_MASK) | precision_table[mode];
asm ("fldcw %0" : : "m" (_mode)); asm ("fldcw %0" : : "m" (_mode));
#endif #endif
#else #else
@ -83,24 +75,32 @@ namespace FPURoundMode
#endif #endif
} }
void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode) void SetSIMDMode(enum RoundModes rounding_mode, bool non_ieee_mode)
{ {
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
const u32 EXCEPTION_MASK = 0x1F80;
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
const u32 DAZ = 0x40;
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
const u32 FTZ = 0x8000;
// lookup table for FPSCR.RN-to-MXCSR.RC translation // lookup table for FPSCR.RN-to-MXCSR.RC translation
static const u32 roundingModeLUT[4] = static const u32 simd_rounding_table[] =
{ {
(0 << 13) | EXCEPTION_MASK, // nearest (0 << 13) | EXCEPTION_MASK, // nearest
(3 << 13) | EXCEPTION_MASK, // -inf (3 << 13) | EXCEPTION_MASK, // -inf
(2 << 13) | EXCEPTION_MASK, // +inf (2 << 13) | EXCEPTION_MASK, // +inf
(1 << 13) | EXCEPTION_MASK, // zero (1 << 13) | EXCEPTION_MASK, // zero
}; };
u32 csr = roundingModeLUT[roundingMode]; u32 csr = simd_rounding_table[rounding_mode];
// Some initial steppings of Pentium 4 CPUs support FTZ but not DAZ.
// They will not flush input operands but flushing outputs only is better than nothing.
static const u32 denormalLUT[2] = static const u32 denormalLUT[2] =
{ {
FTZ, // flush-to-zero only FTZ, // flush-to-zero only
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported) FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
}; };
if (nonIEEEMode) if (non_ieee_mode)
{ {
csr |= denormalLUT[cpu_info.bFlushToZero]; csr |= denormalLUT[cpu_info.bFlushToZero];
} }

View File

@ -8,7 +8,7 @@
#pragma once #pragma once
#include "Common/Common.h" #include "Common/Common.h"
#include "Common/FPURoundMode.h"
// --- Gekko Instruction --- // --- Gekko Instruction ---
@ -390,7 +390,7 @@ union UReg_FPSCR
struct struct
{ {
// Rounding mode (towards: nearest, zero, +inf, -inf) // Rounding mode (towards: nearest, zero, +inf, -inf)
u32 RN : 2; enum FPURoundMode::RoundModes RN : 2;
// Non-IEEE mode enable (aka flush-to-zero) // Non-IEEE mode enable (aka flush-to-zero)
u32 NI : 1; u32 NI : 1;
// Inexact exception enable // Inexact exception enable