mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-12-16 04:09:39 +00:00
Merge e1ee80a70e into ed2fe134aa
This commit is contained in:
commit
4dfd3591cf
@ -72,8 +72,6 @@ add_library(common
|
||||
FileUtil.h
|
||||
FixedSizeQueue.h
|
||||
Flag.h
|
||||
FloatUtils.cpp
|
||||
FloatUtils.h
|
||||
Functional.h
|
||||
FormatUtil.h
|
||||
FPURoundMode.h
|
||||
|
||||
@ -1,108 +0,0 @@
|
||||
// Copyright 2018 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bit>
|
||||
#include <limits>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
namespace Common
|
||||
{
|
||||
template <typename T>
|
||||
constexpr T SNANConstant()
|
||||
{
|
||||
return std::numeric_limits<T>::signaling_NaN();
|
||||
}
|
||||
|
||||
// The most significant bit of the fraction is an is-quiet bit on all architectures we care about.
|
||||
static constexpr u64 DOUBLE_QBIT = 0x0008000000000000ULL;
|
||||
static constexpr u64 DOUBLE_SIGN = 0x8000000000000000ULL;
|
||||
static constexpr u64 DOUBLE_EXP = 0x7FF0000000000000ULL;
|
||||
static constexpr u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL;
|
||||
static constexpr u64 DOUBLE_ZERO = 0x0000000000000000ULL;
|
||||
static constexpr int DOUBLE_EXP_WIDTH = 11;
|
||||
static constexpr int DOUBLE_FRAC_WIDTH = 52;
|
||||
|
||||
static constexpr u32 FLOAT_SIGN = 0x80000000;
|
||||
static constexpr u32 FLOAT_EXP = 0x7F800000;
|
||||
static constexpr u32 FLOAT_FRAC = 0x007FFFFF;
|
||||
static constexpr u32 FLOAT_ZERO = 0x00000000;
|
||||
static constexpr int FLOAT_EXP_WIDTH = 8;
|
||||
static constexpr int FLOAT_FRAC_WIDTH = 23;
|
||||
|
||||
inline bool IsQNAN(double d)
|
||||
{
|
||||
const u64 i = std::bit_cast<u64>(d);
|
||||
return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_QBIT) == DOUBLE_QBIT);
|
||||
}
|
||||
|
||||
inline bool IsSNAN(double d)
|
||||
{
|
||||
const u64 i = std::bit_cast<u64>(d);
|
||||
return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_FRAC) != DOUBLE_ZERO) &&
|
||||
((i & DOUBLE_QBIT) == DOUBLE_ZERO);
|
||||
}
|
||||
|
||||
inline float FlushToZero(float f)
|
||||
{
|
||||
u32 i = std::bit_cast<u32>(f);
|
||||
if ((i & FLOAT_EXP) == 0)
|
||||
{
|
||||
// Turn into signed zero
|
||||
i &= FLOAT_SIGN;
|
||||
}
|
||||
return std::bit_cast<float>(i);
|
||||
}
|
||||
|
||||
inline double FlushToZero(double d)
|
||||
{
|
||||
u64 i = std::bit_cast<u64>(d);
|
||||
if ((i & DOUBLE_EXP) == 0)
|
||||
{
|
||||
// Turn into signed zero
|
||||
i &= DOUBLE_SIGN;
|
||||
}
|
||||
return std::bit_cast<double>(i);
|
||||
}
|
||||
|
||||
inline double MakeQuiet(double d)
|
||||
{
|
||||
const u64 integral = std::bit_cast<u64>(d) | Common::DOUBLE_QBIT;
|
||||
|
||||
return std::bit_cast<double>(integral);
|
||||
}
|
||||
|
||||
enum PPCFpClass
|
||||
{
|
||||
PPC_FPCLASS_QNAN = 0x11,
|
||||
PPC_FPCLASS_NINF = 0x9,
|
||||
PPC_FPCLASS_NN = 0x8,
|
||||
PPC_FPCLASS_ND = 0x18,
|
||||
PPC_FPCLASS_NZ = 0x12,
|
||||
PPC_FPCLASS_PZ = 0x2,
|
||||
PPC_FPCLASS_PD = 0x14,
|
||||
PPC_FPCLASS_PN = 0x4,
|
||||
PPC_FPCLASS_PINF = 0x5,
|
||||
};
|
||||
|
||||
// Uses PowerPC conventions for the return value, so it can be easily
|
||||
// used directly in CPU emulation.
|
||||
u32 ClassifyDouble(double dvalue);
|
||||
u32 ClassifyFloat(float fvalue);
|
||||
|
||||
struct BaseAndDec
|
||||
{
|
||||
int m_base;
|
||||
int m_dec;
|
||||
};
|
||||
extern const std::array<BaseAndDec, 32> frsqrte_expected;
|
||||
extern const std::array<BaseAndDec, 32> fres_expected;
|
||||
|
||||
// PowerPC approximation algorithms
|
||||
double ApproximateReciprocalSquareRoot(double val);
|
||||
double ApproximateReciprocal(double val);
|
||||
|
||||
} // namespace Common
|
||||
@ -123,6 +123,8 @@ add_library(core
|
||||
FifoPlayer/FifoPlayer.h
|
||||
FifoPlayer/FifoRecorder.cpp
|
||||
FifoPlayer/FifoRecorder.h
|
||||
FloatUtils.cpp
|
||||
FloatUtils.h
|
||||
FreeLookConfig.cpp
|
||||
FreeLookConfig.h
|
||||
FreeLookManager.cpp
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
// Copyright 2018 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
|
||||
#include <bit>
|
||||
#include <cmath>
|
||||
|
||||
namespace Common
|
||||
namespace Core
|
||||
{
|
||||
u32 ClassifyDouble(double dvalue)
|
||||
{
|
||||
@ -98,7 +98,7 @@ double ApproximateReciprocalSquareRoot(double val)
|
||||
}
|
||||
|
||||
// Special case NaN-ish numbers
|
||||
if (exponent == (0x7FFLL << 52))
|
||||
if (exponent == DOUBLE_EXP)
|
||||
{
|
||||
if (mantissa == 0)
|
||||
{
|
||||
@ -123,7 +123,7 @@ double ApproximateReciprocalSquareRoot(double val)
|
||||
exponent -= 1LL << 52;
|
||||
mantissa <<= 1;
|
||||
} while (!(mantissa & (1LL << 52)));
|
||||
mantissa &= (1LL << 52) - 1;
|
||||
mantissa &= DOUBLE_FRAC;
|
||||
exponent += 1LL << 52;
|
||||
}
|
||||
|
||||
@ -139,51 +139,70 @@ double ApproximateReciprocalSquareRoot(double val)
|
||||
}
|
||||
|
||||
const std::array<BaseAndDec, 32> fres_expected = {{
|
||||
{0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313},
|
||||
{0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261},
|
||||
{0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5},
|
||||
{0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b},
|
||||
{0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143},
|
||||
{0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a},
|
||||
{0x041c00, 0x108}, {0x020c00, 0x106},
|
||||
{0xfff000, -0x3e1}, {0xf07000, -0x3a7}, {0xe1d400, -0x371}, {0xd41000, -0x340},
|
||||
{0xc71000, -0x313}, {0xbac400, -0x2ea}, {0xaf2000, -0x2c4}, {0xa41000, -0x2a0},
|
||||
{0x999000, -0x27f}, {0x8f9400, -0x261}, {0x861000, -0x245}, {0x7d0000, -0x22a},
|
||||
{0x745800, -0x212}, {0x6c1000, -0x1fb}, {0x642800, -0x1e5}, {0x5c9400, -0x1d1},
|
||||
{0x555000, -0x1be}, {0x4e5800, -0x1ac}, {0x47ac00, -0x19b}, {0x413c00, -0x18b},
|
||||
{0x3b1000, -0x17c}, {0x352000, -0x16e}, {0x2f5c00, -0x15b}, {0x29f000, -0x15b},
|
||||
{0x248800, -0x143}, {0x1f7c00, -0x143}, {0x1a7000, -0x12d}, {0x15bc00, -0x12d},
|
||||
{0x110800, -0x11a}, {0x0ca000, -0x11a}, {0x083800, -0x108}, {0x041800, -0x106},
|
||||
}};
|
||||
|
||||
// Used by fres and ps_res.
|
||||
double ApproximateReciprocal(double val)
|
||||
double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val)
|
||||
{
|
||||
s64 integral = std::bit_cast<s64>(val);
|
||||
const s64 mantissa = integral & ((1LL << 52) - 1);
|
||||
const s64 sign = integral & (1ULL << 63);
|
||||
s64 exponent = integral & (0x7FFLL << 52);
|
||||
const u64 integral = std::bit_cast<u64>(val);
|
||||
|
||||
// Convert into a float when possible
|
||||
const u64 signless = integral & ~DOUBLE_SIGN;
|
||||
const u32 mantissa =
|
||||
static_cast<u32>((integral & DOUBLE_FRAC) >> (DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH));
|
||||
const u32 sign = static_cast<u32>((integral >> 32) & FLOAT_SIGN);
|
||||
const s32 exponent = static_cast<s32>((integral & DOUBLE_EXP) >> DOUBLE_FRAC_WIDTH) - 0x380;
|
||||
|
||||
// The largest floats possible just return 0
|
||||
const u64 huge_float = fpscr.NI ? 0x47d0000000000000ULL : 0x4940000000000000ULL;
|
||||
|
||||
// Special case 0
|
||||
if (mantissa == 0 && exponent == 0)
|
||||
if (signless == 0)
|
||||
return std::copysign(std::numeric_limits<double>::infinity(), val);
|
||||
|
||||
// Special case NaN-ish numbers
|
||||
if (exponent == (0x7FFLL << 52))
|
||||
// Special case huge or NaN-ish numbers
|
||||
if (signless >= huge_float)
|
||||
{
|
||||
if (mantissa == 0)
|
||||
if (!std::isnan(val))
|
||||
return std::copysign(0.0, val);
|
||||
return MakeQuiet(val);
|
||||
}
|
||||
|
||||
// Special case small inputs
|
||||
if (exponent < (895LL << 52))
|
||||
if (exponent < -1)
|
||||
return std::copysign(std::numeric_limits<float>::max(), val);
|
||||
|
||||
// Special case large inputs
|
||||
if (exponent >= (1149LL << 52))
|
||||
return std::copysign(0.0, val);
|
||||
const s32 new_exponent = 253 - exponent;
|
||||
|
||||
exponent = (0x7FDLL << 52) - exponent;
|
||||
|
||||
const int i = static_cast<int>(mantissa >> 37);
|
||||
const u32 i = static_cast<u32>(mantissa >> 8);
|
||||
const auto& entry = fres_expected[i / 1024];
|
||||
integral = sign | exponent;
|
||||
integral |= static_cast<s64>(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29;
|
||||
const u32 new_mantissa = static_cast<u32>(entry.m_base + entry.m_dec * (i % 1024)) / 2;
|
||||
|
||||
return std::bit_cast<double>(integral);
|
||||
u32 result = sign | (static_cast<u32>(new_exponent) << FLOAT_FRAC_WIDTH) | new_mantissa;
|
||||
if (new_exponent <= 0)
|
||||
{
|
||||
// Result is subnormal so format it properly!
|
||||
if (fpscr.NI)
|
||||
{
|
||||
// Flush to 0 if inexact
|
||||
result = sign;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Shift by the exponent amount
|
||||
u32 shift = 1 + static_cast<u32>(-new_exponent);
|
||||
result = sign | (((1 << FLOAT_FRAC_WIDTH) | new_mantissa) >> shift);
|
||||
}
|
||||
}
|
||||
return static_cast<double>(std::bit_cast<float>(result));
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Core
|
||||
191
Source/Core/Core/FloatUtils.h
Normal file
191
Source/Core/Core/FloatUtils.h
Normal file
@ -0,0 +1,191 @@
|
||||
// Copyright 2018 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bit>
|
||||
#include <limits>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
|
||||
namespace Core
|
||||
{
|
||||
template <typename T>
|
||||
constexpr T SNANConstant()
|
||||
{
|
||||
return std::numeric_limits<T>::signaling_NaN();
|
||||
}
|
||||
|
||||
// The most significant bit of the fraction is an is-quiet bit on all architectures we care about.
|
||||
static constexpr u64 DOUBLE_QBIT = 0x0008000000000000ULL;
|
||||
static constexpr u64 DOUBLE_SIGN = 0x8000000000000000ULL;
|
||||
static constexpr u64 DOUBLE_EXP = 0x7FF0000000000000ULL;
|
||||
static constexpr u64 DOUBLE_FRAC = 0x000FFFFFFFFFFFFFULL;
|
||||
static constexpr u64 DOUBLE_ZERO = 0x0000000000000000ULL;
|
||||
static constexpr int DOUBLE_EXP_WIDTH = 11;
|
||||
static constexpr int DOUBLE_FRAC_WIDTH = 52;
|
||||
|
||||
static constexpr u32 FLOAT_SIGN = 0x80000000;
|
||||
static constexpr u32 FLOAT_EXP = 0x7F800000;
|
||||
static constexpr u32 FLOAT_FRAC = 0x007FFFFF;
|
||||
static constexpr u32 FLOAT_ZERO = 0x00000000;
|
||||
static constexpr int FLOAT_EXP_WIDTH = 8;
|
||||
static constexpr int FLOAT_FRAC_WIDTH = 23;
|
||||
|
||||
inline bool IsQNAN(double d)
|
||||
{
|
||||
const u64 i = std::bit_cast<u64>(d);
|
||||
return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_QBIT) == DOUBLE_QBIT);
|
||||
}
|
||||
|
||||
inline bool IsSNAN(double d)
|
||||
{
|
||||
const u64 i = std::bit_cast<u64>(d);
|
||||
return ((i & DOUBLE_EXP) == DOUBLE_EXP) && ((i & DOUBLE_FRAC) != DOUBLE_ZERO) &&
|
||||
((i & DOUBLE_QBIT) == DOUBLE_ZERO);
|
||||
}
|
||||
|
||||
inline float FlushToZero(float f)
|
||||
{
|
||||
u32 i = std::bit_cast<u32>(f);
|
||||
if ((i & FLOAT_EXP) == 0)
|
||||
{
|
||||
// Turn into signed zero
|
||||
i &= FLOAT_SIGN;
|
||||
}
|
||||
return std::bit_cast<float>(i);
|
||||
}
|
||||
|
||||
inline double FlushToZero(double d)
|
||||
{
|
||||
u64 i = std::bit_cast<u64>(d);
|
||||
if ((i & DOUBLE_EXP) == 0)
|
||||
{
|
||||
// Turn into signed zero
|
||||
i &= DOUBLE_SIGN;
|
||||
}
|
||||
return std::bit_cast<double>(i);
|
||||
}
|
||||
|
||||
inline double MakeQuiet(double d)
|
||||
{
|
||||
const u64 integral = std::bit_cast<u64>(d) | DOUBLE_QBIT;
|
||||
|
||||
return std::bit_cast<double>(integral);
|
||||
}
|
||||
|
||||
enum PPCFpClass
|
||||
{
|
||||
PPC_FPCLASS_QNAN = 0x11,
|
||||
PPC_FPCLASS_NINF = 0x9,
|
||||
PPC_FPCLASS_NN = 0x8,
|
||||
PPC_FPCLASS_ND = 0x18,
|
||||
PPC_FPCLASS_NZ = 0x12,
|
||||
PPC_FPCLASS_PZ = 0x2,
|
||||
PPC_FPCLASS_PD = 0x14,
|
||||
PPC_FPCLASS_PN = 0x4,
|
||||
PPC_FPCLASS_PINF = 0x5,
|
||||
};
|
||||
|
||||
// Uses PowerPC conventions for the return value, so it can be easily
|
||||
// used directly in CPU emulation.
|
||||
u32 ClassifyDouble(double dvalue);
|
||||
u32 ClassifyFloat(float fvalue);
|
||||
|
||||
struct BaseAndDec
|
||||
{
|
||||
int m_base;
|
||||
int m_dec;
|
||||
};
|
||||
extern const std::array<BaseAndDec, 32> frsqrte_expected;
|
||||
extern const std::array<BaseAndDec, 32> fres_expected;
|
||||
|
||||
// PowerPC approximation algorithms
|
||||
double ApproximateReciprocalSquareRoot(double val);
|
||||
double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val);
|
||||
|
||||
// Instructions which move data without performing operations round a bit weirdly
|
||||
// Specifically they rounding the mantissa to be like that of a 32-bit float,
|
||||
// going as far as to focus on the rounding mode, but never actually care about
|
||||
// making sure the exponent becomes 32-bit
|
||||
// Either this, or they'll truncate the mantissa down, which will always happen to
|
||||
// PS1 OR PS0 in ps_rsqrte
|
||||
inline u64 TruncateMantissaBits(u64 bits)
|
||||
{
|
||||
// Truncation can be done by simply cutting off the mantissa bits that don't
|
||||
// exist in a single precision float
|
||||
constexpr u64 remove_bits = DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH;
|
||||
constexpr u64 remove_mask = (1 << remove_bits) - 1;
|
||||
return bits & ~remove_mask;
|
||||
}
|
||||
|
||||
inline double TruncateMantissa(double value)
|
||||
{
|
||||
u64 bits = std::bit_cast<u64>(value);
|
||||
u64 trunc_bits = TruncateMantissaBits(bits);
|
||||
return std::bit_cast<double>(trunc_bits);
|
||||
}
|
||||
|
||||
inline u64 RoundMantissaBitsFinite(u64 bits)
|
||||
{
|
||||
const u64 replacement_exp = 0x4000000000000000ull;
|
||||
|
||||
// To round only the mantissa, we assume the host CPU properly matches
|
||||
// the emulated CPU's rounding mode so the rounding of the mantissa will
|
||||
// go in the correct direction
|
||||
// The removing and restoring of the exponent is done via subtraction instead of
|
||||
// bitwise operations due to the possibility that the rounding will cause an overflow
|
||||
// from the mantissa into the exponent (incrementing it by 1)
|
||||
u64 resized_bits = (bits & (DOUBLE_FRAC | DOUBLE_SIGN)) | replacement_exp;
|
||||
|
||||
float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
|
||||
double extended_float = static_cast<double>(rounded_float);
|
||||
u64 rounded_bits = std::bit_cast<u64>(extended_float);
|
||||
|
||||
u64 orig_exp_bits = bits & DOUBLE_EXP;
|
||||
|
||||
if (orig_exp_bits == 0)
|
||||
{
|
||||
// The exponent isn't incremented for double subnormals
|
||||
return rounded_bits & ~DOUBLE_EXP;
|
||||
}
|
||||
|
||||
// Handle the change accordingly otherwise!
|
||||
rounded_bits = (rounded_bits - replacement_exp) + orig_exp_bits;
|
||||
return rounded_bits;
|
||||
}
|
||||
|
||||
inline u64 RoundMantissaBits(u64 bits)
|
||||
{
|
||||
// Checking if the value is non-finite
|
||||
if ((bits & DOUBLE_EXP) == DOUBLE_EXP)
|
||||
{
|
||||
// For infinite and NaN values, the mantissa is simply truncated
|
||||
return TruncateMantissaBits(bits);
|
||||
}
|
||||
|
||||
return RoundMantissaBitsFinite(bits);
|
||||
}
|
||||
|
||||
inline double RoundMantissaFinite(double value)
|
||||
{
|
||||
// This function is only ever used by ps_sum1, because
|
||||
// for some reason it assumes that ps0 should be rounded with
|
||||
// finite values rather than checking if they might be infinite
|
||||
u64 bits = std::bit_cast<u64>(value);
|
||||
u64 rounded_bits = RoundMantissaBitsFinite(bits);
|
||||
return std::bit_cast<double>(rounded_bits);
|
||||
}
|
||||
|
||||
inline double RoundMantissa(double value)
|
||||
{
|
||||
// This function just bitcasts the double value parameter so it
|
||||
// can be used in the more common function that operates on the raw bits
|
||||
u64 bits = std::bit_cast<u64>(value);
|
||||
u64 rounded_bits = RoundMantissaBits(bits);
|
||||
return std::bit_cast<double>(rounded_bits);
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
@ -9,7 +9,7 @@
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
#include "Core/PowerPC/Interpreter/ExceptionUtils.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
@ -59,11 +59,11 @@ inline float ForceSingle(const UReg_FPSCR& fpscr, double value)
|
||||
|
||||
constexpr u64 smallest_normal_single = 0x3810000000000000;
|
||||
const u64 value_without_sign =
|
||||
std::bit_cast<u64>(value) & (Common::DOUBLE_EXP | Common::DOUBLE_FRAC);
|
||||
std::bit_cast<u64>(value) & (Core::DOUBLE_EXP | Core::DOUBLE_FRAC);
|
||||
|
||||
if (value_without_sign < smallest_normal_single)
|
||||
{
|
||||
const u64 flushed_double = std::bit_cast<u64>(value) & Common::DOUBLE_SIGN;
|
||||
const u64 flushed_double = std::bit_cast<u64>(value) & Core::DOUBLE_SIGN;
|
||||
const u32 flushed_single = static_cast<u32>(flushed_double >> 32);
|
||||
return std::bit_cast<float>(flushed_single);
|
||||
}
|
||||
@ -74,7 +74,7 @@ inline float ForceSingle(const UReg_FPSCR& fpscr, double value)
|
||||
float x = static_cast<float>(value);
|
||||
if (!cpu_info.bFlushToZero && fpscr.NI)
|
||||
{
|
||||
x = Common::FlushToZero(x);
|
||||
x = Core::FlushToZero(x);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
@ -83,7 +83,7 @@ inline double ForceDouble(const UReg_FPSCR& fpscr, double d)
|
||||
{
|
||||
if (!cpu_info.bFlushToZero && fpscr.NI)
|
||||
{
|
||||
d = Common::FlushToZero(d);
|
||||
d = Core::FlushToZero(d);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
@ -92,8 +92,8 @@ inline double Force25Bit(double d)
|
||||
{
|
||||
u64 integral = std::bit_cast<u64>(d);
|
||||
|
||||
u64 exponent = integral & Common::DOUBLE_EXP;
|
||||
u64 fraction = integral & Common::DOUBLE_FRAC;
|
||||
u64 exponent = integral & Core::DOUBLE_EXP;
|
||||
u64 fraction = integral & Core::DOUBLE_FRAC;
|
||||
|
||||
if (exponent == 0 && fraction != 0)
|
||||
{
|
||||
@ -108,7 +108,7 @@ inline double Force25Bit(double d)
|
||||
// the fraction is "normal"
|
||||
// That is to say shifting it until the MSB of the fraction
|
||||
// would escape into the exponent
|
||||
u32 shift = std::countl_zero(fraction) - (63 - Common::DOUBLE_FRAC_WIDTH);
|
||||
u32 shift = std::countl_zero(fraction) - (63 - Core::DOUBLE_FRAC_WIDTH);
|
||||
keep_mask >>= shift;
|
||||
round >>= shift;
|
||||
|
||||
@ -146,7 +146,7 @@ inline FPResult NI_mul(PowerPC::PowerPCState& ppc_state, double a, double b)
|
||||
|
||||
if (std::isnan(result.value))
|
||||
{
|
||||
if (Common::IsSNAN(a) || Common::IsSNAN(b))
|
||||
if (Core::IsSNAN(a) || Core::IsSNAN(b))
|
||||
{
|
||||
result.SetException(ppc_state, FPSCR_VXSNAN);
|
||||
}
|
||||
@ -155,12 +155,12 @@ inline FPResult NI_mul(PowerPC::PowerPCState& ppc_state, double a, double b)
|
||||
|
||||
if (std::isnan(a))
|
||||
{
|
||||
result.value = Common::MakeQuiet(a);
|
||||
result.value = Core::MakeQuiet(a);
|
||||
return result;
|
||||
}
|
||||
if (std::isnan(b))
|
||||
{
|
||||
result.value = Common::MakeQuiet(b);
|
||||
result.value = Core::MakeQuiet(b);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -186,19 +186,19 @@ inline FPResult NI_div(PowerPC::PowerPCState& ppc_state, double a, double b)
|
||||
}
|
||||
else if (std::isnan(result.value))
|
||||
{
|
||||
if (Common::IsSNAN(a) || Common::IsSNAN(b))
|
||||
if (Core::IsSNAN(a) || Core::IsSNAN(b))
|
||||
result.SetException(ppc_state, FPSCR_VXSNAN);
|
||||
|
||||
ppc_state.fpscr.ClearFIFR();
|
||||
|
||||
if (std::isnan(a))
|
||||
{
|
||||
result.value = Common::MakeQuiet(a);
|
||||
result.value = Core::MakeQuiet(a);
|
||||
return result;
|
||||
}
|
||||
if (std::isnan(b))
|
||||
{
|
||||
result.value = Common::MakeQuiet(b);
|
||||
result.value = Core::MakeQuiet(b);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -220,19 +220,19 @@ inline FPResult NI_add(PowerPC::PowerPCState& ppc_state, double a, double b)
|
||||
|
||||
if (std::isnan(result.value))
|
||||
{
|
||||
if (Common::IsSNAN(a) || Common::IsSNAN(b))
|
||||
if (Core::IsSNAN(a) || Core::IsSNAN(b))
|
||||
result.SetException(ppc_state, FPSCR_VXSNAN);
|
||||
|
||||
ppc_state.fpscr.ClearFIFR();
|
||||
|
||||
if (std::isnan(a))
|
||||
{
|
||||
result.value = Common::MakeQuiet(a);
|
||||
result.value = Core::MakeQuiet(a);
|
||||
return result;
|
||||
}
|
||||
if (std::isnan(b))
|
||||
{
|
||||
result.value = Common::MakeQuiet(b);
|
||||
result.value = Core::MakeQuiet(b);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -253,19 +253,19 @@ inline FPResult NI_sub(PowerPC::PowerPCState& ppc_state, double a, double b)
|
||||
|
||||
if (std::isnan(result.value))
|
||||
{
|
||||
if (Common::IsSNAN(a) || Common::IsSNAN(b))
|
||||
if (Core::IsSNAN(a) || Core::IsSNAN(b))
|
||||
result.SetException(ppc_state, FPSCR_VXSNAN);
|
||||
|
||||
ppc_state.fpscr.ClearFIFR();
|
||||
|
||||
if (std::isnan(a))
|
||||
{
|
||||
result.value = Common::MakeQuiet(a);
|
||||
result.value = Core::MakeQuiet(a);
|
||||
return result;
|
||||
}
|
||||
if (std::isnan(b))
|
||||
{
|
||||
result.value = Common::MakeQuiet(b);
|
||||
result.value = Core::MakeQuiet(b);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -500,24 +500,24 @@ inline FPResult NI_madd_msub(PowerPC::PowerPCState& ppc_state, double a, double
|
||||
|
||||
if (std::isnan(result.value))
|
||||
{
|
||||
if (Common::IsSNAN(a) || Common::IsSNAN(b) || Common::IsSNAN(c))
|
||||
if (Core::IsSNAN(a) || Core::IsSNAN(b) || Core::IsSNAN(c))
|
||||
result.SetException(ppc_state, FPSCR_VXSNAN);
|
||||
|
||||
ppc_state.fpscr.ClearFIFR();
|
||||
|
||||
if (std::isnan(a))
|
||||
{
|
||||
result.value = Common::MakeQuiet(a);
|
||||
result.value = Core::MakeQuiet(a);
|
||||
return result;
|
||||
}
|
||||
if (std::isnan(b))
|
||||
{
|
||||
result.value = Common::MakeQuiet(b); // !
|
||||
result.value = Core::MakeQuiet(b); // !
|
||||
return result;
|
||||
}
|
||||
if (std::isnan(c))
|
||||
{
|
||||
result.value = Common::MakeQuiet(c);
|
||||
result.value = Core::MakeQuiet(c);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -549,13 +549,13 @@ inline u32 ConvertToSingle(u64 x)
|
||||
{
|
||||
const u32 exp = u32((x >> 52) & 0x7ff);
|
||||
|
||||
if (exp > 896 || (x & ~Common::DOUBLE_SIGN) == 0)
|
||||
if (exp > 896 || (x & ~Core::DOUBLE_SIGN) == 0)
|
||||
{
|
||||
return u32(((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff));
|
||||
}
|
||||
else if (exp >= 874)
|
||||
{
|
||||
u32 t = u32(0x80000000 | ((x & Common::DOUBLE_FRAC) >> 21));
|
||||
u32 t = u32(0x80000000 | ((x & Core::DOUBLE_FRAC) >> 21));
|
||||
t = t >> (905 - exp);
|
||||
t |= u32((x >> 32) & 0x80000000);
|
||||
return t;
|
||||
@ -573,7 +573,7 @@ inline u32 ConvertToSingleFTZ(u64 x)
|
||||
{
|
||||
const u32 exp = u32((x >> 52) & 0x7ff);
|
||||
|
||||
if (exp > 896 || (x & ~Common::DOUBLE_SIGN) == 0)
|
||||
if (exp > 896 || (x & ~Core::DOUBLE_SIGN) == 0)
|
||||
{
|
||||
return u32(((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff));
|
||||
}
|
||||
|
||||
@ -7,8 +7,8 @@
|
||||
#include <limits>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Common/Unreachable.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
@ -86,7 +86,7 @@ void ConvertToInteger(PowerPC::PowerPCState& ppc_state, UGeckoInstruction inst,
|
||||
|
||||
if (std::isnan(b))
|
||||
{
|
||||
if (Common::IsSNAN(b))
|
||||
if (Core::IsSNAN(b))
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
|
||||
value = 0x80000000;
|
||||
@ -153,7 +153,7 @@ void Interpreter::Helper_FloatCompareOrdered(PowerPC::PowerPCState& ppc_state,
|
||||
if (std::isnan(fa) || std::isnan(fb))
|
||||
{
|
||||
compare_result = FPCC::FU;
|
||||
if (Common::IsSNAN(fa) || Common::IsSNAN(fb))
|
||||
if (Core::IsSNAN(fa) || Core::IsSNAN(fb))
|
||||
{
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
if (ppc_state.fpscr.VE == 0)
|
||||
@ -196,7 +196,7 @@ void Interpreter::Helper_FloatCompareUnordered(PowerPC::PowerPCState& ppc_state,
|
||||
{
|
||||
compare_result = FPCC::FU;
|
||||
|
||||
if (Common::IsSNAN(fa) || Common::IsSNAN(fb))
|
||||
if (Core::IsSNAN(fa) || Core::IsSNAN(fb))
|
||||
{
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
}
|
||||
@ -317,7 +317,7 @@ void Interpreter::frspx(Interpreter& interpreter, UGeckoInstruction inst) // ro
|
||||
|
||||
if (std::isnan(b))
|
||||
{
|
||||
const bool is_snan = Common::IsSNAN(b);
|
||||
const bool is_snan = Core::IsSNAN(b);
|
||||
|
||||
if (is_snan)
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
@ -517,7 +517,7 @@ void Interpreter::fresx(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
const double b = ppc_state.ps[inst.FB].PS0AsDouble();
|
||||
|
||||
const auto compute_result = [&ppc_state, inst](double value) {
|
||||
const double result = Common::ApproximateReciprocal(value);
|
||||
const double result = Core::ApproximateReciprocal(ppc_state.fpscr, value);
|
||||
ppc_state.ps[inst.FD].Fill(result);
|
||||
ppc_state.UpdateFPRFSingle(float(result));
|
||||
};
|
||||
@ -530,7 +530,7 @@ void Interpreter::fresx(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
if (ppc_state.fpscr.ZE == 0)
|
||||
compute_result(b);
|
||||
}
|
||||
else if (Common::IsSNAN(b))
|
||||
else if (Core::IsSNAN(b))
|
||||
{
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
ppc_state.fpscr.ClearFIFR();
|
||||
@ -556,7 +556,7 @@ void Interpreter::frsqrtex(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
const double b = ppc_state.ps[inst.FB].PS0AsDouble();
|
||||
|
||||
const auto compute_result = [&ppc_state, inst](double value) {
|
||||
const double result = Common::ApproximateReciprocalSquareRoot(value);
|
||||
const double result = Core::ApproximateReciprocalSquareRoot(value);
|
||||
ppc_state.ps[inst.FD].SetPS0(result);
|
||||
ppc_state.UpdateFPRFDouble(result);
|
||||
};
|
||||
@ -577,7 +577,7 @@ void Interpreter::frsqrtex(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
if (ppc_state.fpscr.ZE == 0)
|
||||
compute_result(b);
|
||||
}
|
||||
else if (Common::IsSNAN(b))
|
||||
else if (Core::IsSNAN(b))
|
||||
{
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
ppc_state.fpscr.ClearFIFR();
|
||||
|
||||
@ -3,10 +3,11 @@
|
||||
|
||||
#include "Core/PowerPC/Interpreter/Interpreter.h"
|
||||
|
||||
#include <bit>
|
||||
#include <cmath>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
@ -18,8 +19,9 @@ void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
const auto& c = ppc_state.ps[inst.FC];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(),
|
||||
a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble());
|
||||
double ps0 = a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble();
|
||||
double ps1 = a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble();
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissa(ps0), ps1);
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -30,8 +32,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63),
|
||||
b.PS1AsU64() ^ (UINT64_C(1) << 63));
|
||||
u64 ps0 = b.PS0AsU64() ^ (UINT64_C(1) << 63);
|
||||
u64 ps1 = b.PS1AsU64() ^ (UINT64_C(1) << 63);
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(ps0), ps1);
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -40,7 +43,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
void Interpreter::ps_mr(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
{
|
||||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
ppc_state.ps[inst.FD] = ppc_state.ps[inst.FB];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissa(b.PS0AsDouble()), b.PS1AsDouble());
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -51,8 +56,9 @@ void Interpreter::ps_nabs(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63),
|
||||
b.PS1AsU64() | (UINT64_C(1) << 63));
|
||||
u64 ps0 = b.PS0AsU64() | (UINT64_C(1) << 63);
|
||||
u64 ps1 = b.PS1AsU64() | (UINT64_C(1) << 63);
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(ps0), ps1);
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -63,8 +69,9 @@ void Interpreter::ps_abs(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63),
|
||||
b.PS1AsU64() & ~(UINT64_C(1) << 63));
|
||||
u64 ps0 = b.PS0AsU64() & ~(UINT64_C(1) << 63);
|
||||
u64 ps1 = b.PS1AsU64() & ~(UINT64_C(1) << 63);
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(ps0), ps1);
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -77,7 +84,7 @@ void Interpreter::ps_merge00(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
const auto& a = ppc_state.ps[inst.FA];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS0AsDouble());
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(a.PS0AsU64()), b.PS0AsU64());
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -89,7 +96,7 @@ void Interpreter::ps_merge01(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
const auto& a = ppc_state.ps[inst.FA];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS1AsDouble());
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(a.PS0AsU64()), b.PS1AsU64());
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -101,7 +108,7 @@ void Interpreter::ps_merge10(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
const auto& a = ppc_state.ps[inst.FA];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS0AsDouble());
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(a.PS1AsU64()), b.PS0AsU64());
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -113,7 +120,7 @@ void Interpreter::ps_merge11(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
const auto& a = ppc_state.ps[inst.FA];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS1AsDouble());
|
||||
ppc_state.ps[inst.FD].SetBoth(Core::RoundMantissaBits(a.PS1AsU64()), b.PS1AsU64());
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
@ -143,7 +150,12 @@ void Interpreter::ps_res(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
// this code is based on the real hardware tests
|
||||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
const double a = ppc_state.ps[inst.FB].PS0AsDouble();
|
||||
const double b = ppc_state.ps[inst.FB].PS1AsDouble();
|
||||
const double b = ppc_state.ps[inst.FB].PS1AsReciprocalDouble();
|
||||
|
||||
// The entire process of conditionally truncating the mantissa bits of the b register
|
||||
// isn't needed, because ps_res never reads those bottom mantissa bits anyways when
|
||||
// doing the operation on a standard input (e.g. not NaN nor Infinity).
|
||||
// That is to say, the operation is 32-bit, compared to rsqrte which is 64-bit.
|
||||
|
||||
if (a == 0.0 || b == 0.0)
|
||||
{
|
||||
@ -154,11 +166,11 @@ void Interpreter::ps_res(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
if (std::isnan(a) || std::isinf(a) || std::isnan(b) || std::isinf(b))
|
||||
ppc_state.fpscr.ClearFIFR();
|
||||
|
||||
if (Common::IsSNAN(a) || Common::IsSNAN(b))
|
||||
if (Core::IsSNAN(a) || Core::IsSNAN(b))
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
|
||||
const double ps0 = Common::ApproximateReciprocal(a);
|
||||
const double ps1 = Common::ApproximateReciprocal(b);
|
||||
const double ps0 = Core::TruncateMantissa(Core::ApproximateReciprocal(ppc_state.fpscr, a));
|
||||
const double ps1 = Core::ApproximateReciprocal(ppc_state.fpscr, b);
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
|
||||
ppc_state.UpdateFPRFSingle(float(ps0));
|
||||
@ -171,7 +183,15 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
{
|
||||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
const double ps0 = ppc_state.ps[inst.FB].PS0AsDouble();
|
||||
const double ps1 = ppc_state.ps[inst.FB].PS1AsDouble();
|
||||
double ps1 = ppc_state.ps[inst.FB].PS1AsReciprocalDouble();
|
||||
|
||||
if (std::bit_cast<s64>(ps1) > 0)
|
||||
{
|
||||
// If ps1 is < 0.0, we want the result to remain < 0.0 even for
|
||||
// the smallest of subnormals which would otherwise be truncated to 0.0,
|
||||
// specifically so the proper exception is set
|
||||
ps1 = Core::TruncateMantissa(ps1);
|
||||
}
|
||||
|
||||
if (ps0 == 0.0 || ps1 == 0.0)
|
||||
{
|
||||
@ -188,11 +208,12 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
if (std::isnan(ps0) || std::isinf(ps0) || std::isnan(ps1) || std::isinf(ps1))
|
||||
ppc_state.fpscr.ClearFIFR();
|
||||
|
||||
if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
|
||||
if (Core::IsSNAN(ps0) || Core::IsSNAN(ps1))
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
|
||||
const float dst_ps0 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps0));
|
||||
const float dst_ps1 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps1));
|
||||
// For some reason ps0 is also truncated for this operation rather than rounded
|
||||
const double dst_ps0 = Core::TruncateMantissa(Core::ApproximateReciprocalSquareRoot(ps0));
|
||||
const double dst_ps1 = Core::ApproximateReciprocalSquareRoot(ps1);
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(dst_ps0, dst_ps1);
|
||||
ppc_state.UpdateFPRFSingle(dst_ps0);
|
||||
@ -355,7 +376,7 @@ void Interpreter::ps_sum0(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
|
||||
const float ps0 =
|
||||
ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
|
||||
const float ps1 = ForceSingle(ppc_state.fpscr, c.PS1AsDouble());
|
||||
const double ps1 = c.PS1AsDouble();
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
|
||||
ppc_state.UpdateFPRFSingle(ps0);
|
||||
@ -371,7 +392,8 @@ void Interpreter::ps_sum1(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
const auto& c = ppc_state.ps[inst.FC];
|
||||
|
||||
const float ps0 = ForceSingle(ppc_state.fpscr, c.PS0AsDouble());
|
||||
// Rounds assuming ps0 is finite for some reason
|
||||
const double ps0 = Core::RoundMantissaFinite(c.PS0AsDouble());
|
||||
const float ps1 =
|
||||
ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
|
||||
|
||||
|
||||
@ -8,9 +8,9 @@
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Common/Intrinsics.h"
|
||||
#include "Common/Swap.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/HW/MMIO.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
@ -899,13 +899,13 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
|
||||
MOVDDUP(dst, R(dst));
|
||||
}
|
||||
|
||||
alignas(16) static const u64 psDoubleExp[2] = {Common::DOUBLE_EXP, 0};
|
||||
alignas(16) static const u64 psDoubleFrac[2] = {Common::DOUBLE_FRAC, 0};
|
||||
alignas(16) static const u64 psDoubleNoSign[2] = {~Common::DOUBLE_SIGN, 0};
|
||||
alignas(16) static const u64 psDoubleExp[2] = {Core::DOUBLE_EXP, 0};
|
||||
alignas(16) static const u64 psDoubleFrac[2] = {Core::DOUBLE_FRAC, 0};
|
||||
alignas(16) static const u64 psDoubleNoSign[2] = {~Core::DOUBLE_SIGN, 0};
|
||||
|
||||
alignas(16) static const u32 psFloatExp[4] = {Common::FLOAT_EXP, 0, 0, 0};
|
||||
alignas(16) static const u32 psFloatFrac[4] = {Common::FLOAT_FRAC, 0, 0, 0};
|
||||
alignas(16) static const u32 psFloatNoSign[4] = {~Common::FLOAT_SIGN, 0, 0, 0};
|
||||
alignas(16) static const u32 psFloatExp[4] = {Core::FLOAT_EXP, 0, 0, 0};
|
||||
alignas(16) static const u32 psFloatFrac[4] = {Core::FLOAT_FRAC, 0, 0, 0};
|
||||
alignas(16) static const u32 psFloatNoSign[4] = {~Core::FLOAT_SIGN, 0, 0, 0};
|
||||
|
||||
// TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer,
|
||||
// storing the result of each floating point op and calculating it when needed. This is trickier
|
||||
@ -931,9 +931,9 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
|
||||
FixupBranch maxExponent = J_CC(CC_C);
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
|
||||
// Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN;
|
||||
// Nice normalized number: sign ? Core::PPC_FPCLASS_NN : Core::PPC_FPCLASS_PN;
|
||||
LEA(32, RSCRATCH,
|
||||
MScaled(RSCRATCH, Common::PPC_FPCLASS_NN - Common::PPC_FPCLASS_PN, Common::PPC_FPCLASS_PN));
|
||||
MScaled(RSCRATCH, Core::PPC_FPCLASS_NN - Core::PPC_FPCLASS_PN, Core::PPC_FPCLASS_PN));
|
||||
continue1 = J();
|
||||
|
||||
SetJumpTarget(maxExponent);
|
||||
@ -943,15 +943,14 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
|
||||
PTEST(xmm, MConst(psDoubleFrac));
|
||||
FixupBranch notNAN = J_CC(CC_Z);
|
||||
|
||||
// Max exponent + mantissa: PPC_FPCLASS_QNAN
|
||||
MOV(32, R(RSCRATCH), Imm32(Common::PPC_FPCLASS_QNAN));
|
||||
// Max exponent + mantissa: Core::PPC_FPCLASS_QNAN
|
||||
MOV(32, R(RSCRATCH), Imm32(Core::PPC_FPCLASS_QNAN));
|
||||
continue2 = J();
|
||||
|
||||
// Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF;
|
||||
// Max exponent + no mantissa: sign ? Core::PPC_FPCLASS_NINF : Core::PPC_FPCLASS_PINF;
|
||||
SetJumpTarget(notNAN);
|
||||
LEA(32, RSCRATCH,
|
||||
MScaled(RSCRATCH, Common::PPC_FPCLASS_NINF - Common::PPC_FPCLASS_PINF,
|
||||
Common::PPC_FPCLASS_PINF));
|
||||
MScaled(RSCRATCH, Core::PPC_FPCLASS_NINF - Core::PPC_FPCLASS_PINF, Core::PPC_FPCLASS_PINF));
|
||||
continue3 = J();
|
||||
|
||||
SetJumpTarget(zeroExponent);
|
||||
@ -961,29 +960,29 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
|
||||
PTEST(xmm, MConst(psDoubleNoSign));
|
||||
FixupBranch zero = J_CC(CC_Z);
|
||||
|
||||
// No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
|
||||
// No exponent + mantissa: sign ? Core::PPC_FPCLASS_ND : Core::PPC_FPCLASS_PD;
|
||||
LEA(32, RSCRATCH,
|
||||
MScaled(RSCRATCH, Common::PPC_FPCLASS_ND - Common::PPC_FPCLASS_PD, Common::PPC_FPCLASS_PD));
|
||||
MScaled(RSCRATCH, Core::PPC_FPCLASS_ND - Core::PPC_FPCLASS_PD, Core::PPC_FPCLASS_PD));
|
||||
continue4 = J();
|
||||
|
||||
// Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ;
|
||||
// Zero: sign ? Core::PPC_FPCLASS_NZ : Core::PPC_FPCLASS_PZ;
|
||||
SetJumpTarget(zero);
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
ADD(32, R(RSCRATCH), Imm8(Common::PPC_FPCLASS_PZ));
|
||||
ADD(32, R(RSCRATCH), Imm8(Core::PPC_FPCLASS_PZ));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
if (single)
|
||||
TEST(32, R(RSCRATCH), Imm32(Common::FLOAT_EXP));
|
||||
TEST(32, R(RSCRATCH), Imm32(Core::FLOAT_EXP));
|
||||
else
|
||||
TEST(64, R(RSCRATCH), MConst(psDoubleExp));
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
|
||||
if (single)
|
||||
{
|
||||
AND(32, R(RSCRATCH), Imm32(~Common::FLOAT_SIGN));
|
||||
CMP(32, R(RSCRATCH), Imm32(Common::FLOAT_EXP));
|
||||
AND(32, R(RSCRATCH), Imm32(~Core::FLOAT_SIGN));
|
||||
CMP(32, R(RSCRATCH), Imm32(Core::FLOAT_EXP));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -997,37 +996,36 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm, bool single)
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
|
||||
LEA(32, RSCRATCH,
|
||||
MScaled(RSCRATCH, Common::PPC_FPCLASS_NN - Common::PPC_FPCLASS_PN, Common::PPC_FPCLASS_PN));
|
||||
MScaled(RSCRATCH, Core::PPC_FPCLASS_NN - Core::PPC_FPCLASS_PN, Core::PPC_FPCLASS_PN));
|
||||
continue1 = J();
|
||||
|
||||
SetJumpTarget(nan);
|
||||
MOV(32, R(RSCRATCH), Imm32(Common::PPC_FPCLASS_QNAN));
|
||||
MOV(32, R(RSCRATCH), Imm32(Core::PPC_FPCLASS_QNAN));
|
||||
continue2 = J();
|
||||
|
||||
SetJumpTarget(infinity);
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
|
||||
LEA(32, RSCRATCH,
|
||||
MScaled(RSCRATCH, Common::PPC_FPCLASS_NINF - Common::PPC_FPCLASS_PINF,
|
||||
Common::PPC_FPCLASS_PINF));
|
||||
MScaled(RSCRATCH, Core::PPC_FPCLASS_NINF - Core::PPC_FPCLASS_PINF, Core::PPC_FPCLASS_PINF));
|
||||
continue3 = J();
|
||||
|
||||
SetJumpTarget(zeroExponent);
|
||||
if (single)
|
||||
TEST(input_size, R(RSCRATCH), Imm32(~Common::FLOAT_SIGN));
|
||||
TEST(input_size, R(RSCRATCH), Imm32(~Core::FLOAT_SIGN));
|
||||
else
|
||||
TEST(input_size, R(RSCRATCH), MConst(psDoubleNoSign));
|
||||
FixupBranch zero = J_CC(CC_Z);
|
||||
|
||||
SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
|
||||
LEA(32, RSCRATCH,
|
||||
MScaled(RSCRATCH, Common::PPC_FPCLASS_ND - Common::PPC_FPCLASS_PD, Common::PPC_FPCLASS_PD));
|
||||
MScaled(RSCRATCH, Core::PPC_FPCLASS_ND - Core::PPC_FPCLASS_PD, Core::PPC_FPCLASS_PD));
|
||||
continue4 = J();
|
||||
|
||||
SetJumpTarget(zero);
|
||||
SHR(input_size, R(RSCRATCH), Imm8(input_size - 1));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
ADD(32, R(RSCRATCH), Imm8(Common::PPC_FPCLASS_PZ));
|
||||
ADD(32, R(RSCRATCH), Imm8(Core::PPC_FPCLASS_PZ));
|
||||
}
|
||||
|
||||
SetJumpTarget(continue1);
|
||||
|
||||
@ -8,11 +8,11 @@
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/EnumUtils.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Common/Intrinsics.h"
|
||||
#include "Common/JitRegister.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64Constants.h"
|
||||
@ -142,20 +142,20 @@ void CommonAsmRoutines::GenFrsqrte()
|
||||
AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F));
|
||||
|
||||
PUSH(RSCRATCH2);
|
||||
MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(Common::frsqrte_expected)));
|
||||
static_assert(sizeof(Common::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
|
||||
MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(Core::frsqrte_expected)));
|
||||
static_assert(sizeof(Core::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
|
||||
|
||||
SHR(64, R(RSCRATCH), Imm8(37));
|
||||
AND(32, R(RSCRATCH), Imm32(0x7FF));
|
||||
IMUL(32, RSCRATCH,
|
||||
MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(Common::BaseAndDec, m_dec)));
|
||||
MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(Core::BaseAndDec, m_dec)));
|
||||
ADD(32, R(RSCRATCH),
|
||||
MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(Common::BaseAndDec, m_base)));
|
||||
MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(Core::BaseAndDec, m_base)));
|
||||
SHL(64, R(RSCRATCH), Imm8(26));
|
||||
|
||||
POP(RSCRATCH2);
|
||||
OR(64, R(RSCRATCH2), R(RSCRATCH)); // vali |= (s64)(frsqrte_expected_base[index] +
|
||||
// frsqrte_expected_dec[index] * (i % 2048)) << 26;
|
||||
OR(64, R(RSCRATCH2), R(RSCRATCH)); // vali |= (s64)(Core::frsqrte_expected_base[index] +
|
||||
// Core::frsqrte_expected_dec[index] * (i % 2048)) << 26;
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||
RET();
|
||||
|
||||
@ -207,7 +207,7 @@ void CommonAsmRoutines::GenFrsqrte()
|
||||
|
||||
SetJumpTarget(denormal);
|
||||
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
||||
ABI_CallFunction(Common::ApproximateReciprocalSquareRoot);
|
||||
ABI_CallFunction(Core::ApproximateReciprocalSquareRoot);
|
||||
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
||||
RET();
|
||||
|
||||
@ -248,25 +248,23 @@ void CommonAsmRoutines::GenFres()
|
||||
AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024
|
||||
|
||||
PUSH(RSCRATCH_EXTRA);
|
||||
MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(Common::fres_expected)));
|
||||
static_assert(sizeof(Common::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
|
||||
MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(Core::fres_expected)));
|
||||
static_assert(sizeof(Core::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size");
|
||||
|
||||
IMUL(32, RSCRATCH,
|
||||
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_dec)));
|
||||
ADD(32, R(RSCRATCH), Imm8(1));
|
||||
SHR(32, R(RSCRATCH), Imm8(1));
|
||||
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Core::BaseAndDec, m_dec)));
|
||||
|
||||
MOV(32, R(RSCRATCH2),
|
||||
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_base)));
|
||||
SUB(32, R(RSCRATCH2), R(RSCRATCH));
|
||||
SHL(64, R(RSCRATCH2), Imm8(29));
|
||||
ADD(32, R(RSCRATCH),
|
||||
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Core::BaseAndDec, m_base)));
|
||||
SHR(32, R(RSCRATCH), Imm8(1));
|
||||
SHL(64, R(RSCRATCH), Imm8(29));
|
||||
|
||||
POP(RSCRATCH_EXTRA);
|
||||
|
||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] -
|
||||
// (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
|
||||
// << 29
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||
OR(64, R(RSCRATCH),
|
||||
R(RSCRATCH_EXTRA)); // vali |= (s64)((u64)(Core::fres_expected_base[i / 1024] +
|
||||
// (Core::fres_expected_dec[i / 1024] * (i % 1024)) / 2)) << 29
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||
RET();
|
||||
|
||||
// Exception flags for zero input.
|
||||
@ -278,7 +276,8 @@ void CommonAsmRoutines::GenFres()
|
||||
|
||||
SetJumpTarget(complex);
|
||||
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
||||
ABI_CallFunction(Common::ApproximateReciprocal);
|
||||
LEA(64, ABI_PARAM1, PPCSTATE(fpscr));
|
||||
ABI_CallFunction(Core::ApproximateReciprocal);
|
||||
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
||||
RET();
|
||||
|
||||
|
||||
@ -10,12 +10,12 @@
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Config/Config.h"
|
||||
#include "Common/EnumUtils.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Common/JitRegister.h"
|
||||
#include "Common/MathUtil.h"
|
||||
|
||||
#include "Core/Config/MainSettings.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/HW/CPU.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
@ -280,7 +280,7 @@ void JitArm64::GenerateFres()
|
||||
|
||||
UBFX(ARM64Reg::X2, ARM64Reg::X1, 52, 11); // Grab the exponent
|
||||
m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
|
||||
AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Common::DOUBLE_SIGN, GPRSize::B64));
|
||||
AND(ARM64Reg::X3, ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
|
||||
CMP(ARM64Reg::X2, 895);
|
||||
FixupBranch small_exponent = B(CCFlags::CC_LO);
|
||||
|
||||
@ -288,20 +288,18 @@ void JitArm64::GenerateFres()
|
||||
FixupBranch large_exponent = B(CCFlags::CC_HI);
|
||||
|
||||
UBFX(ARM64Reg::X2, ARM64Reg::X1, 47, 5); // Grab upper part of mantissa
|
||||
MOVP2R(ARM64Reg::X3, &Common::fres_expected);
|
||||
MOVP2R(ARM64Reg::X3, &Core::fres_expected);
|
||||
ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3));
|
||||
UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10); // Grab lower part of mantissa
|
||||
LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0);
|
||||
MOVI2R(ARM64Reg::W4, 1);
|
||||
MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4);
|
||||
SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1));
|
||||
AND(ARM64Reg::X0, ARM64Reg::X0,
|
||||
LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, GPRSize::B64));
|
||||
MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W2);
|
||||
AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Core::DOUBLE_SIGN | Core::DOUBLE_EXP, GPRSize::B64));
|
||||
LSR(ARM64Reg::W1, ARM64Reg::W1, 1);
|
||||
ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29));
|
||||
RET();
|
||||
|
||||
SetJumpTarget(small_exponent);
|
||||
TST(ARM64Reg::X1, LogicalImm(Common::DOUBLE_EXP | Common::DOUBLE_FRAC, GPRSize::B64));
|
||||
TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_EXP | Core::DOUBLE_FRAC, GPRSize::B64));
|
||||
FixupBranch zero = B(CCFlags::CC_EQ);
|
||||
MOVI2R(ARM64Reg::X4, std::bit_cast<u64>(static_cast<double>(std::numeric_limits<float>::max())));
|
||||
ORR(ARM64Reg::X0, ARM64Reg::X3, ARM64Reg::X4);
|
||||
@ -333,18 +331,17 @@ void JitArm64::GenerateFrsqrte()
|
||||
LSL(ARM64Reg::X2, ARM64Reg::X1, 1);
|
||||
m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
|
||||
CLS(ARM64Reg::X3, ARM64Reg::X2);
|
||||
TST(ARM64Reg::X1, LogicalImm(Common::DOUBLE_SIGN, GPRSize::B64));
|
||||
CCMP(ARM64Reg::X3, Common::DOUBLE_EXP_WIDTH - 1, 0b0010, CCFlags::CC_EQ);
|
||||
TST(ARM64Reg::X1, LogicalImm(Core::DOUBLE_SIGN, GPRSize::B64));
|
||||
CCMP(ARM64Reg::X3, Core::DOUBLE_EXP_WIDTH - 1, 0b0010, CCFlags::CC_EQ);
|
||||
FixupBranch not_positive_normal = B(CCFlags::CC_HS);
|
||||
|
||||
const u8* positive_normal = GetCodePtr();
|
||||
UBFX(ARM64Reg::X2, ARM64Reg::X1, 48, 5);
|
||||
MOVP2R(ARM64Reg::X3, &Common::frsqrte_expected);
|
||||
MOVP2R(ARM64Reg::X3, &Core::frsqrte_expected);
|
||||
ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3));
|
||||
LDP(IndexType::Signed, ARM64Reg::W3, ARM64Reg::W2, ARM64Reg::X2, 0);
|
||||
UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 11);
|
||||
AND(ARM64Reg::X0, ARM64Reg::X0,
|
||||
LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, GPRSize::B64));
|
||||
AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Core::DOUBLE_SIGN | Core::DOUBLE_EXP, GPRSize::B64));
|
||||
MADD(ARM64Reg::W1, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
||||
ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 26));
|
||||
RET();
|
||||
@ -463,8 +460,8 @@ void JitArm64::GenerateFPRF(bool single)
|
||||
constexpr ARM64Reg fpscr_reg = ARM64Reg::W4;
|
||||
|
||||
const int input_size = single ? 32 : 64;
|
||||
const int input_exp_size = single ? Common::FLOAT_EXP_WIDTH : Common::DOUBLE_EXP_WIDTH;
|
||||
const u64 input_frac_mask = single ? Common::FLOAT_FRAC : Common::DOUBLE_FRAC;
|
||||
const int input_exp_size = single ? Core::FLOAT_EXP_WIDTH : Core::DOUBLE_EXP_WIDTH;
|
||||
const u64 input_frac_mask = single ? Core::FLOAT_FRAC : Core::DOUBLE_FRAC;
|
||||
constexpr u32 output_sign_mask = 0xC;
|
||||
|
||||
// First of all, start the load of the old FPSCR value, in case it takes a while
|
||||
@ -475,8 +472,8 @@ void JitArm64::GenerateFPRF(bool single)
|
||||
FixupBranch not_zero = CBNZ(cls_reg);
|
||||
|
||||
// exp == 0 && frac == 0
|
||||
MOVI2R(ARM64Reg::W3, Common::PPC_FPCLASS_PZ);
|
||||
MOVI2R(ARM64Reg::W1, Common::PPC_FPCLASS_NZ);
|
||||
MOVI2R(ARM64Reg::W3, Core::PPC_FPCLASS_PZ);
|
||||
MOVI2R(ARM64Reg::W1, Core::PPC_FPCLASS_NZ);
|
||||
CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W3, CCFlags::CC_LT);
|
||||
|
||||
const u8* write_fprf_and_ret = GetCodePtr();
|
||||
@ -490,8 +487,8 @@ void JitArm64::GenerateFPRF(bool single)
|
||||
|
||||
// All branches except the zero branch handle the sign in the same way.
|
||||
// Perform that handling before branching further
|
||||
MOVI2R(ARM64Reg::W3, Common::PPC_FPCLASS_PN);
|
||||
MOVI2R(ARM64Reg::W1, Common::PPC_FPCLASS_NN);
|
||||
MOVI2R(ARM64Reg::W3, Core::PPC_FPCLASS_PN);
|
||||
MOVI2R(ARM64Reg::W1, Core::PPC_FPCLASS_NN);
|
||||
CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W3, CCFlags::CC_LT);
|
||||
|
||||
CMP(cls_reg, input_exp_size - 1);
|
||||
@ -501,14 +498,13 @@ void JitArm64::GenerateFPRF(bool single)
|
||||
FixupBranch nan_or_inf = TBNZ(input_reg, input_size - 2);
|
||||
|
||||
// exp == 0 && frac != 0
|
||||
ORR(fprf_reg, fprf_reg, LogicalImm(Common::PPC_FPCLASS_PD & ~output_sign_mask, GPRSize::B32));
|
||||
ORR(fprf_reg, fprf_reg, LogicalImm(Core::PPC_FPCLASS_PD & ~output_sign_mask, GPRSize::B32));
|
||||
B(write_fprf_and_ret);
|
||||
|
||||
// exp == EXP_MASK
|
||||
SetJumpTarget(nan_or_inf);
|
||||
MOVI2R(ARM64Reg::W2, Common::PPC_FPCLASS_QNAN);
|
||||
ORR(ARM64Reg::W1, fprf_reg,
|
||||
LogicalImm(Common::PPC_FPCLASS_PINF & ~output_sign_mask, GPRSize::B32));
|
||||
MOVI2R(ARM64Reg::W2, Core::PPC_FPCLASS_QNAN);
|
||||
ORR(ARM64Reg::W1, fprf_reg, LogicalImm(Core::PPC_FPCLASS_PINF & ~output_sign_mask, GPRSize::B32));
|
||||
TST(input_reg, LogicalImm(input_frac_mask, single ? GPRSize::B32 : GPRSize::B64));
|
||||
CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W2, CCFlags::CC_EQ);
|
||||
B(write_fprf_and_ret);
|
||||
|
||||
@ -13,13 +13,13 @@
|
||||
#include "Common/ChunkFile.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FPURoundMode.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Common/Logging/Log.h"
|
||||
|
||||
#include "Core/CPUThreadConfigCallback.h"
|
||||
#include "Core/Config/MainSettings.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/HW/CPU.h"
|
||||
#include "Core/HW/SystemTimers.h"
|
||||
#include "Core/Host.h"
|
||||
@ -40,7 +40,25 @@ double PairedSingle::PS0AsDouble() const
|
||||
|
||||
double PairedSingle::PS1AsDouble() const
|
||||
{
|
||||
return std::bit_cast<double>(ps1);
|
||||
return Core::TruncateMantissa(std::bit_cast<double>(ps1));
|
||||
}
|
||||
|
||||
// If ps1 would get truncated to 0 if read as a raw value, set the sign
|
||||
// of the input for reciprocal operations
|
||||
// It's not exactly clear why this happens, but that's also why PS1 is
|
||||
// truncated on read rather than on write
|
||||
double PairedSingle::PS1AsReciprocalDouble() const
|
||||
{
|
||||
constexpr u64 trunc_bits = Core::DOUBLE_FRAC_WIDTH - Core::FLOAT_FRAC_WIDTH;
|
||||
constexpr u64 trunc_mask = (1 << trunc_bits) - 1;
|
||||
|
||||
u64 bits = ps1;
|
||||
if ((ps1 & ~(trunc_mask | Core::DOUBLE_SIGN)) == 0 && (ps1 & trunc_mask) != 0)
|
||||
{
|
||||
bits |= Core::DOUBLE_SIGN;
|
||||
}
|
||||
|
||||
return std::bit_cast<double>(bits);
|
||||
}
|
||||
|
||||
void PairedSingle::SetPS0(double value)
|
||||
@ -682,12 +700,12 @@ void PowerPCState::SetSR(u32 index, u32 value)
|
||||
|
||||
void PowerPCState::UpdateFPRFDouble(double dvalue)
|
||||
{
|
||||
fpscr.FPRF = Common::ClassifyDouble(dvalue);
|
||||
fpscr.FPRF = Core::ClassifyDouble(dvalue);
|
||||
}
|
||||
|
||||
void PowerPCState::UpdateFPRFSingle(float fvalue)
|
||||
{
|
||||
fpscr.FPRF = Common::ClassifyFloat(fvalue);
|
||||
fpscr.FPRF = Core::ClassifyFloat(fvalue);
|
||||
}
|
||||
|
||||
void RoundingModeUpdated(PowerPCState& ppc_state)
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#include "Core/CPUThreadConfigCallback.h"
|
||||
#include "Core/Debugger/BranchWatch.h"
|
||||
#include "Core/Debugger/PPCDebugInterface.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/PowerPC/BreakPoints.h"
|
||||
#include "Core/PowerPC/ConditionRegister.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
@ -70,14 +71,21 @@ struct TLBEntry
|
||||
|
||||
struct PairedSingle
|
||||
{
|
||||
// By default, truncate PS1
|
||||
// Due to reciprocal operations having a quirk in which the sign
|
||||
// of the input PS1 is set if the value in it beforehand would
|
||||
// be truncated to 0, setting PS1 then only truncating it on read
|
||||
// operations simply works easier than creating an entire flag
|
||||
// for this specific case
|
||||
u64 PS0AsU64() const { return ps0; }
|
||||
u64 PS1AsU64() const { return ps1; }
|
||||
u64 PS1AsU64() const { return Core::TruncateMantissaBits(ps1); }
|
||||
|
||||
u32 PS0AsU32() const { return static_cast<u32>(ps0); }
|
||||
u32 PS1AsU32() const { return static_cast<u32>(ps1); }
|
||||
|
||||
double PS0AsDouble() const;
|
||||
double PS1AsDouble() const;
|
||||
double PS1AsReciprocalDouble() const;
|
||||
|
||||
void SetPS0(u64 value) { ps0 = value; }
|
||||
void SetPS0(double value);
|
||||
|
||||
@ -64,7 +64,6 @@
|
||||
<ClInclude Include="Common\FileUtil.h" />
|
||||
<ClInclude Include="Common\FixedSizeQueue.h" />
|
||||
<ClInclude Include="Common\Flag.h" />
|
||||
<ClInclude Include="Common\FloatUtils.h" />
|
||||
<ClInclude Include="Common\FormatUtil.h" />
|
||||
<ClInclude Include="Common\FPURoundMode.h" />
|
||||
<ClInclude Include="Common\Functional.h" />
|
||||
@ -251,6 +250,7 @@
|
||||
<ClInclude Include="Core\FifoPlayer\FifoDataFile.h" />
|
||||
<ClInclude Include="Core\FifoPlayer\FifoPlayer.h" />
|
||||
<ClInclude Include="Core\FifoPlayer\FifoRecorder.h" />
|
||||
<ClInclude Include="Core\FloatUtils.h" />
|
||||
<ClInclude Include="Core\FreeLookConfig.h" />
|
||||
<ClInclude Include="Core\FreeLookManager.h" />
|
||||
<ClInclude Include="Core\GeckoCode.h" />
|
||||
@ -831,7 +831,6 @@
|
||||
<ClCompile Include="Common\FileSearch.cpp" />
|
||||
<ClCompile Include="Common\FilesystemWatcher.cpp" />
|
||||
<ClCompile Include="Common\FileUtil.cpp" />
|
||||
<ClCompile Include="Common\FloatUtils.cpp" />
|
||||
<ClCompile Include="Common\GekkoDisassembler.cpp" />
|
||||
<ClCompile Include="Common\GL\GLContext.cpp" />
|
||||
<ClCompile Include="Common\GL\GLExtensions\GLExtensions.cpp" />
|
||||
@ -936,6 +935,7 @@
|
||||
<ClCompile Include="Core\FifoPlayer\FifoDataFile.cpp" />
|
||||
<ClCompile Include="Core\FifoPlayer\FifoPlayer.cpp" />
|
||||
<ClCompile Include="Core\FifoPlayer\FifoRecorder.cpp" />
|
||||
<ClCompile Include="Core\FloatUtils.cpp" />
|
||||
<ClCompile Include="Core\FreeLookConfig.cpp" />
|
||||
<ClCompile Include="Core\FreeLookManager.cpp" />
|
||||
<ClCompile Include="Core\GeckoCode.cpp" />
|
||||
|
||||
@ -23,10 +23,10 @@
|
||||
|
||||
#include "Common/Align.h"
|
||||
#include "Common/BitUtils.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Common/StringUtil.h"
|
||||
#include "Common/Swap.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/HW/AddressSpace.h"
|
||||
#include "Core/PowerPC/BreakPoints.h"
|
||||
#include "Core/PowerPC/PPCSymbolDB.h"
|
||||
|
||||
@ -12,7 +12,6 @@ add_dolphin_test(EventTest EventTest.cpp)
|
||||
add_dolphin_test(FileUtilTest FileUtilTest.cpp)
|
||||
add_dolphin_test(FixedSizeQueueTest FixedSizeQueueTest.cpp)
|
||||
add_dolphin_test(FlagTest FlagTest.cpp)
|
||||
add_dolphin_test(FloatUtilsTest FloatUtilsTest.cpp)
|
||||
add_dolphin_test(MathUtilTest MathUtilTest.cpp)
|
||||
add_dolphin_test(MutexTest MutexTest.cpp)
|
||||
add_dolphin_test(NandPathsTest NandPathsTest.cpp)
|
||||
|
||||
@ -2,6 +2,7 @@ add_dolphin_test(MMIOTest MMIOTest.cpp)
|
||||
add_dolphin_test(PageFaultTest PageFaultTest.cpp)
|
||||
add_dolphin_test(CoreTimingTest CoreTimingTest.cpp)
|
||||
add_dolphin_test(PatchAllowlistTest PatchAllowlistTest.cpp)
|
||||
add_dolphin_test(FloatUtilsTest FloatUtilsTest.cpp)
|
||||
|
||||
add_dolphin_test(DSPAcceleratorTest DSP/DSPAcceleratorTest.cpp)
|
||||
add_dolphin_test(DSPAssemblyTest
|
||||
|
||||
@ -8,20 +8,20 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
|
||||
#include "../Core/PowerPC/TestValues.h"
|
||||
|
||||
TEST(FloatUtils, IsQNAN)
|
||||
{
|
||||
EXPECT_TRUE(Common::IsQNAN(std::numeric_limits<double>::quiet_NaN()));
|
||||
EXPECT_FALSE(Common::IsQNAN(Common::SNANConstant<double>()));
|
||||
EXPECT_TRUE(Core::IsQNAN(std::numeric_limits<double>::quiet_NaN()));
|
||||
EXPECT_FALSE(Core::IsQNAN(Core::SNANConstant<double>()));
|
||||
}
|
||||
|
||||
TEST(FloatUtils, IsSNAN)
|
||||
{
|
||||
EXPECT_FALSE(Common::IsSNAN(std::numeric_limits<double>::quiet_NaN()));
|
||||
EXPECT_TRUE(Common::IsSNAN(Common::SNANConstant<double>()));
|
||||
EXPECT_FALSE(Core::IsSNAN(std::numeric_limits<double>::quiet_NaN()));
|
||||
EXPECT_TRUE(Core::IsSNAN(Core::SNANConstant<double>()));
|
||||
}
|
||||
|
||||
TEST(FloatUtils, FlushToZero)
|
||||
@ -34,18 +34,18 @@ TEST(FloatUtils, FlushToZero)
|
||||
EXPECT_LT(0.f, s * 2);
|
||||
EXPECT_LT(0.0, d * 2);
|
||||
|
||||
EXPECT_EQ(+0.0, Common::FlushToZero(+std::numeric_limits<double>::denorm_min()));
|
||||
EXPECT_EQ(-0.0, Common::FlushToZero(-std::numeric_limits<double>::denorm_min()));
|
||||
EXPECT_EQ(+0.0, Common::FlushToZero(+std::numeric_limits<double>::min() / 2));
|
||||
EXPECT_EQ(-0.0, Common::FlushToZero(-std::numeric_limits<double>::min() / 2));
|
||||
EXPECT_EQ(+0.0, Core::FlushToZero(+std::numeric_limits<double>::denorm_min()));
|
||||
EXPECT_EQ(-0.0, Core::FlushToZero(-std::numeric_limits<double>::denorm_min()));
|
||||
EXPECT_EQ(+0.0, Core::FlushToZero(+std::numeric_limits<double>::min() / 2));
|
||||
EXPECT_EQ(-0.0, Core::FlushToZero(-std::numeric_limits<double>::min() / 2));
|
||||
EXPECT_EQ(std::numeric_limits<double>::min(),
|
||||
Common::FlushToZero(std::numeric_limits<double>::min()));
|
||||
Core::FlushToZero(std::numeric_limits<double>::min()));
|
||||
EXPECT_EQ(std::numeric_limits<double>::max(),
|
||||
Common::FlushToZero(std::numeric_limits<double>::max()));
|
||||
Core::FlushToZero(std::numeric_limits<double>::max()));
|
||||
EXPECT_EQ(+std::numeric_limits<double>::infinity(),
|
||||
Common::FlushToZero(+std::numeric_limits<double>::infinity()));
|
||||
Core::FlushToZero(+std::numeric_limits<double>::infinity()));
|
||||
EXPECT_EQ(-std::numeric_limits<double>::infinity(),
|
||||
Common::FlushToZero(-std::numeric_limits<double>::infinity()));
|
||||
Core::FlushToZero(-std::numeric_limits<double>::infinity()));
|
||||
|
||||
// Test all subnormals as well as an equally large set of random normal floats.
|
||||
std::default_random_engine engine(0);
|
||||
@ -53,16 +53,16 @@ TEST(FloatUtils, FlushToZero)
|
||||
for (u32 i = 0; i <= 0x007fffffu; ++i)
|
||||
{
|
||||
u32 i_tmp = i;
|
||||
EXPECT_EQ(+0.f, Common::FlushToZero(std::bit_cast<float>(i_tmp)));
|
||||
EXPECT_EQ(+0.f, Core::FlushToZero(std::bit_cast<float>(i_tmp)));
|
||||
|
||||
i_tmp |= 0x80000000u;
|
||||
EXPECT_EQ(-0.f, Common::FlushToZero(std::bit_cast<float>(i_tmp)));
|
||||
EXPECT_EQ(-0.f, Core::FlushToZero(std::bit_cast<float>(i_tmp)));
|
||||
|
||||
i_tmp = dist(engine);
|
||||
EXPECT_EQ(i_tmp, std::bit_cast<u32>(Common::FlushToZero(std::bit_cast<float>(i_tmp))));
|
||||
EXPECT_EQ(i_tmp, std::bit_cast<u32>(Core::FlushToZero(std::bit_cast<float>(i_tmp))));
|
||||
|
||||
i_tmp |= 0x80000000u;
|
||||
EXPECT_EQ(i_tmp, std::bit_cast<u32>(Common::FlushToZero(std::bit_cast<float>(i_tmp))));
|
||||
EXPECT_EQ(i_tmp, std::bit_cast<u32>(Core::FlushToZero(std::bit_cast<float>(i_tmp))));
|
||||
}
|
||||
}
|
||||
|
||||
@ -92,7 +92,7 @@ TEST(FloatUtils, ApproximateReciprocalSquareRoot)
|
||||
|
||||
u64 expected = expected_values[i];
|
||||
|
||||
u64 actual = std::bit_cast<u64>(Common::ApproximateReciprocalSquareRoot(dvalue));
|
||||
u64 actual = std::bit_cast<u64>(Core::ApproximateReciprocalSquareRoot(dvalue));
|
||||
|
||||
EXPECT_EQ(expected, actual);
|
||||
}
|
||||
@ -5,10 +5,10 @@
|
||||
#include <cstring>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FloatUtils.h"
|
||||
#include "Common/ScopeGuard.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/FloatUtils.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h"
|
||||
@ -72,7 +72,7 @@ TEST(Jit64, Frsqrte)
|
||||
{
|
||||
const double dvalue = std::bit_cast<double>(ivalue);
|
||||
|
||||
u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocalSquareRoot(dvalue));
|
||||
u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocalSquareRoot(dvalue));
|
||||
|
||||
u64 actual = routines.wrapped_frsqrte(ivalue, fpscr);
|
||||
|
||||
|
||||
@ -58,11 +58,14 @@ TEST(JitArm64, Fres)
|
||||
|
||||
TestFres test(Core::System::GetInstance());
|
||||
|
||||
// FPSCR with NI set
|
||||
const UReg_FPSCR fpscr = UReg_FPSCR(0x00000004);
|
||||
|
||||
for (const u64 ivalue : double_test_values)
|
||||
{
|
||||
const double dvalue = std::bit_cast<double>(ivalue);
|
||||
|
||||
const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocal(dvalue));
|
||||
const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocal(fpscr, dvalue));
|
||||
const u64 actual = test.fres(ivalue);
|
||||
|
||||
if (expected != actual)
|
||||
|
||||
@ -63,7 +63,7 @@ TEST(JitArm64, Frsqrte)
|
||||
{
|
||||
const double dvalue = std::bit_cast<double>(ivalue);
|
||||
|
||||
const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocalSquareRoot(dvalue));
|
||||
const u64 expected = std::bit_cast<u64>(Core::ApproximateReciprocalSquareRoot(dvalue));
|
||||
const u64 actual = test.frsqrte(ivalue);
|
||||
|
||||
if (expected != actual)
|
||||
|
||||
@ -51,7 +51,6 @@
|
||||
<ClCompile Include="Common\FileUtilTest.cpp" />
|
||||
<ClCompile Include="Common\FixedSizeQueueTest.cpp" />
|
||||
<ClCompile Include="Common\FlagTest.cpp" />
|
||||
<ClCompile Include="Common\FloatUtilsTest.cpp" />
|
||||
<ClCompile Include="Common\MathUtilTest.cpp" />
|
||||
<ClCompile Include="Common\MutexTest.cpp" />
|
||||
<ClCompile Include="Common\NandPathsTest.cpp" />
|
||||
@ -67,6 +66,7 @@
|
||||
<ClCompile Include="Core\DSP\DSPTestText.cpp" />
|
||||
<ClCompile Include="Core\DSP\HermesBinary.cpp" />
|
||||
<ClCompile Include="Core\DSP\HermesText.cpp" />
|
||||
<ClCompile Include="Core\FloatUtilsTest.cpp" />
|
||||
<ClCompile Include="Core\IOS\ES\FormatsTest.cpp" />
|
||||
<ClCompile Include="Core\IOS\FS\FileSystemTest.cpp" />
|
||||
<ClCompile Include="Core\IOS\USB\SkylandersTest.cpp" />
|
||||
|
||||
Loading…
Reference in New Issue
Block a user