mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-26 12:35:27 +00:00
x64FPURoundMode: move things around a bit
This commit is contained in:
parent
269e2aefa7
commit
1a428de189
@ -11,20 +11,20 @@ namespace FPURoundMode
|
||||
enum RoundModes
|
||||
{
|
||||
ROUND_NEAR = 0,
|
||||
ROUND_CHOP,
|
||||
ROUND_UP,
|
||||
ROUND_DOWN
|
||||
ROUND_CHOP = 1,
|
||||
ROUND_UP = 2,
|
||||
ROUND_DOWN = 3
|
||||
};
|
||||
enum PrecisionModes {
|
||||
PREC_24 = 0,
|
||||
PREC_53,
|
||||
PREC_64
|
||||
PREC_53 = 1,
|
||||
PREC_64 = 2
|
||||
};
|
||||
void SetRoundMode(u32 mode);
|
||||
void SetRoundMode(enum RoundModes mode);
|
||||
|
||||
void SetPrecisionMode(u32 mode);
|
||||
void SetPrecisionMode(enum PrecisionModes mode);
|
||||
|
||||
void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode);
|
||||
void SetSIMDMode(enum RoundModes rounding_mode, bool non_ieee_mode);
|
||||
|
||||
/*
|
||||
* There are two different flavors of float to int conversion:
|
||||
|
@ -21,13 +21,13 @@
|
||||
// Generic, do nothing
|
||||
namespace FPURoundMode
|
||||
{
|
||||
void SetRoundMode(u32 mode)
|
||||
void SetRoundMode(enum RoundModes mode)
|
||||
{
|
||||
}
|
||||
void SetPrecisionMode(u32 mode)
|
||||
void SetPrecisionMode(enum PrecisionModes mode)
|
||||
{
|
||||
}
|
||||
void SetSIMDMode(u32 mode, u32 nonIEEEMode)
|
||||
void SetSIMDMode(enum RoundModes rounding_mode, bool non_ieee_mode)
|
||||
{
|
||||
}
|
||||
void SaveSIMDState()
|
||||
|
@ -4,30 +4,21 @@
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/FPURoundMode.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
static const unsigned short FPU_ROUND_NEAR = 0 << 10;
|
||||
static const unsigned short FPU_ROUND_DOWN = 1 << 10;
|
||||
static const unsigned short FPU_ROUND_UP = 2 << 10;
|
||||
static const unsigned short FPU_ROUND_CHOP = 3 << 10;
|
||||
static const unsigned short FPU_ROUND_MASK = 3 << 10;
|
||||
#include <xmmintrin.h>
|
||||
#ifdef _WIN32
|
||||
# include <mmintrin.h>
|
||||
#else
|
||||
# include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
|
||||
static const u32 EXCEPTION_MASK = 0x1F80;
|
||||
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
|
||||
static const u32 DAZ = 0x40;
|
||||
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
|
||||
static const u32 FTZ = 0x8000;
|
||||
|
||||
namespace FPURoundMode
|
||||
{
|
||||
// Get the default SSE states here.
|
||||
static u32 saved_sse_state = _mm_getcsr();
|
||||
static const u32 default_sse_state = _mm_getcsr();
|
||||
|
||||
void SetRoundMode(u32 mode)
|
||||
void SetRoundMode(enum RoundModes mode)
|
||||
{
|
||||
// Set FPU rounding mode to mimic the PowerPC's
|
||||
#ifdef _M_IX86
|
||||
@ -42,22 +33,23 @@ namespace FPURoundMode
|
||||
};
|
||||
_set_controlfp(_MCW_RC, table[mode]);
|
||||
#else
|
||||
const unsigned short table[4] =
|
||||
const unsigned short X87_ROUND_MASK = 3 << 10;
|
||||
const unsigned short x87_rounding_table[] =
|
||||
{
|
||||
FPU_ROUND_NEAR,
|
||||
FPU_ROUND_CHOP,
|
||||
FPU_ROUND_UP,
|
||||
FPU_ROUND_DOWN
|
||||
0 << 10, // nearest
|
||||
3 << 10, // zero
|
||||
2 << 10, // +inf
|
||||
1 << 10, // -inf
|
||||
};
|
||||
unsigned short _mode;
|
||||
asm ("fstcw %0" : "=m" (_mode) : );
|
||||
_mode = (_mode & ~FPU_ROUND_MASK) | table[mode];
|
||||
asm ("fstcw %0" : "=m" (_mode));
|
||||
_mode = (_mode & ~X87_ROUND_MASK) | x87_rounding_table[mode];
|
||||
asm ("fldcw %0" : : "m" (_mode));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void SetPrecisionMode(u32 mode)
|
||||
void SetPrecisionMode(enum PrecisionModes mode)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
// sets the floating-point lib to 53-bit
|
||||
@ -66,15 +58,15 @@ namespace FPURoundMode
|
||||
#ifdef _WIN32
|
||||
_control87(_PC_53, MCW_PC);
|
||||
#else
|
||||
const unsigned short table[4] = {
|
||||
0 << 8, // FPU_PREC_24
|
||||
2 << 8, // FPU_PREC_53
|
||||
3 << 8, // FPU_PREC_64
|
||||
3 << 8, // FPU_PREC_MASK
|
||||
const unsigned short PRECISION_MASK = 3 << 8;
|
||||
const unsigned short precision_table[] = {
|
||||
0 << 8, // 24 bits
|
||||
2 << 8, // 53 bits
|
||||
3 << 8, // 64 bits
|
||||
};
|
||||
unsigned short _mode;
|
||||
asm ("fstcw %0" : "=m" (_mode));
|
||||
_mode = (_mode & ~table[3]) | table[mode];
|
||||
_mode = (_mode & ~PRECISION_MASK) | precision_table[mode];
|
||||
asm ("fldcw %0" : : "m" (_mode));
|
||||
#endif
|
||||
#else
|
||||
@ -83,24 +75,32 @@ namespace FPURoundMode
|
||||
#endif
|
||||
}
|
||||
|
||||
void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode)
|
||||
void SetSIMDMode(enum RoundModes rounding_mode, bool non_ieee_mode)
|
||||
{
|
||||
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
|
||||
const u32 EXCEPTION_MASK = 0x1F80;
|
||||
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
|
||||
const u32 DAZ = 0x40;
|
||||
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
|
||||
const u32 FTZ = 0x8000;
|
||||
// lookup table for FPSCR.RN-to-MXCSR.RC translation
|
||||
static const u32 roundingModeLUT[4] =
|
||||
static const u32 simd_rounding_table[] =
|
||||
{
|
||||
(0 << 13) | EXCEPTION_MASK, // nearest
|
||||
(3 << 13) | EXCEPTION_MASK, // -inf
|
||||
(2 << 13) | EXCEPTION_MASK, // +inf
|
||||
(1 << 13) | EXCEPTION_MASK, // zero
|
||||
};
|
||||
u32 csr = roundingModeLUT[roundingMode];
|
||||
u32 csr = simd_rounding_table[rounding_mode];
|
||||
|
||||
// Some initial steppings of Pentium 4 CPUs support FTZ but not DAZ.
|
||||
// They will not flush input operands but flushing outputs only is better than nothing.
|
||||
static const u32 denormalLUT[2] =
|
||||
{
|
||||
FTZ, // flush-to-zero only
|
||||
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
|
||||
};
|
||||
if (nonIEEEMode)
|
||||
if (non_ieee_mode)
|
||||
{
|
||||
csr |= denormalLUT[cpu_info.bFlushToZero];
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "Common/Common.h"
|
||||
|
||||
#include "Common/FPURoundMode.h"
|
||||
|
||||
// --- Gekko Instruction ---
|
||||
|
||||
@ -390,7 +390,7 @@ union UReg_FPSCR
|
||||
struct
|
||||
{
|
||||
// Rounding mode (towards: nearest, zero, +inf, -inf)
|
||||
u32 RN : 2;
|
||||
enum FPURoundMode::RoundModes RN : 2;
|
||||
// Non-IEEE mode enable (aka flush-to-zero)
|
||||
u32 NI : 1;
|
||||
// Inexact exception enable
|
||||
|
Loading…
x
Reference in New Issue
Block a user