mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
iR5900: Faster FTOI
This commit is contained in:
parent
5bc2342d47
commit
c72e894fc7
@ -249,7 +249,6 @@ void CTC1() {
|
||||
|
||||
void CVT_S() {
|
||||
_FdValf_ = (float)_FsValSl_;
|
||||
_FdValf_ = fpuDouble( _FdValUl_ );
|
||||
}
|
||||
|
||||
void CVT_W() {
|
||||
|
||||
@ -26,8 +26,6 @@ namespace DOUBLE
|
||||
void recC_EQ_xmm(int info);
|
||||
void recC_LE_xmm(int info);
|
||||
void recC_LT_xmm(int info);
|
||||
void recCVT_S_xmm(int info);
|
||||
void recCVT_W();
|
||||
void recDIV_S_xmm(int info);
|
||||
void recMADD_S_xmm(int info);
|
||||
void recMADDA_S_xmm(int info);
|
||||
@ -993,15 +991,16 @@ void recCVT_S_xmm(int info)
|
||||
}
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
|
||||
void recCVT_S()
|
||||
{
|
||||
// Float version is fully accurate, no double version
|
||||
eeFPURecompileCode(recCVT_S_xmm, R5900::Interpreter::OpcodeImpl::COP1::CVT_S, XMMINFO_WRITED | XMMINFO_READS);
|
||||
}
|
||||
|
||||
void recCVT_W()
|
||||
{
|
||||
if (CHECK_FPU_FULL)
|
||||
{
|
||||
DOUBLE::recCVT_W();
|
||||
return;
|
||||
}
|
||||
// Float version is fully accurate, no double version
|
||||
|
||||
// If we have the following EmitOP() on the top then it'll get calculated twice when CHECK_FPU_FULL is true
|
||||
// as we also have an EmitOP() at recCVT_W() on iFPUd.cpp. hence we have it below the possible return.
|
||||
EE::Profiler.EmitOp(eeOpcode::CVTW);
|
||||
@ -1010,26 +1009,23 @@ void recCVT_W()
|
||||
|
||||
if (regs >= 0)
|
||||
{
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW)
|
||||
fpuFloat2(regs);
|
||||
xCVTTSS2SI(eax, xRegisterSSE(regs));
|
||||
xMOVMSKPS(edx, xRegisterSSE(regs)); //extract the signs
|
||||
xAND(edx, 1); // keep only LSB
|
||||
xMOVD(edx, xRegisterSSE(regs));
|
||||
}
|
||||
else
|
||||
{
|
||||
xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xSHR(edx, 31); // mov sign to lsb
|
||||
}
|
||||
|
||||
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
|
||||
_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
|
||||
xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
|
||||
|
||||
xCMP(eax, 0x80000000); // If the result is indefinitive
|
||||
xCMOVE(eax, edx); // Saturate it
|
||||
// cvttss2si converts unrepresentable values to 0x80000000, so negative values are already handled.
|
||||
// So we just need to handle positive values.
|
||||
xCMP(edx, 0x4f000000); // If the input is greater than INT_MAX
|
||||
xMOV(edx, 0x7fffffff);
|
||||
xCMOVGE(eax, edx); // Saturate it
|
||||
|
||||
//Write the result
|
||||
xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax);
|
||||
|
||||
@ -540,57 +540,10 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT);
|
||||
//------------------------------------------------------------------
|
||||
// CVT.x XMM
|
||||
//------------------------------------------------------------------
|
||||
void recCVT_S_xmm(int info)
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::CVTS_F);
|
||||
|
||||
if (info & PROCESS_EE_D)
|
||||
{
|
||||
if (info & PROCESS_EE_S)
|
||||
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else
|
||||
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
const int temp = _allocTempXMMreg(XMMT_FPS);
|
||||
xCVTSI2SS(xRegisterSSE(temp), ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
xMOVSS(ptr32[&fpuRegs.fpr[_Fd_]], xRegisterSSE(temp));
|
||||
_freeXMMreg(temp);
|
||||
}
|
||||
}
|
||||
// CVT.S: Identical to non-double variant, omitted
|
||||
// CVT.W: Identical to non-double variant, omitted
|
||||
|
||||
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
|
||||
|
||||
void recCVT_W() //called from iFPU.cpp's recCVT_W
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::CVTW);
|
||||
int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
|
||||
|
||||
if (regs >= 0)
|
||||
{
|
||||
xCVTTSS2SI(eax, xRegisterSSE(regs));
|
||||
xMOVMSKPS(edx, xRegisterSSE(regs)); // extract the signs
|
||||
xAND(edx, 1); // keep only LSB
|
||||
}
|
||||
else
|
||||
{
|
||||
xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xSHR(edx, 31); //mov sign to lsb
|
||||
}
|
||||
|
||||
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
|
||||
_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
|
||||
xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
|
||||
|
||||
xCMP(eax, 0x80000000); // If the result is indefinitive
|
||||
xCMOVE(eax, edx); // Saturate it
|
||||
|
||||
//Write the result
|
||||
xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax);
|
||||
}
|
||||
//------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
@ -42,6 +42,7 @@ struct mVU_Globals
|
||||
u32 E4 [4] = __four(0x3933e553);
|
||||
u32 E5 [4] = __four(0x36b63510);
|
||||
u32 E6 [4] = __four(0x353961ac);
|
||||
u32 I32MAXF [4] = __four(0x4effffff);
|
||||
float FTOI_4 [4] = __four(16.0);
|
||||
float FTOI_12 [4] = __four(4096.0);
|
||||
float FTOI_15 [4] = __four(32768.0);
|
||||
|
||||
@ -484,23 +484,19 @@ static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum)
|
||||
return;
|
||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
||||
const xmm& t1 = mVU.regAlloc->allocReg();
|
||||
const xmm& t2 = mVU.regAlloc->allocReg();
|
||||
|
||||
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
|
||||
xMOVAPS(t1, Fs);
|
||||
// cvttps2dq returns 0x8000000 for any unrepresentable values.
|
||||
// We want it to return 0x8000000 for negative and 0x7fffffff for positive.
|
||||
// So for unrepresentable positive values, xor with 0xffffffff to turn 0x80000000 into 0x7fffffff.
|
||||
if (addr)
|
||||
xMUL.PS(Fs, ptr128[addr]);
|
||||
xMOVAPS(t1, Fs);
|
||||
xPCMP.GTD(t1, ptr128[mVUglob.I32MAXF]);
|
||||
xCVTTPS2DQ(Fs, Fs);
|
||||
xPXOR(t1, ptr128[mVUglob.signbit]);
|
||||
xPSRA.D(t1, 31);
|
||||
xMOVAPS(t2, Fs);
|
||||
xPCMP.EQD(t2, ptr128[mVUglob.signbit]);
|
||||
xAND.PS(t1, t2);
|
||||
xPADD.D(Fs, t1);
|
||||
xPXOR(Fs, t1);
|
||||
|
||||
mVU.regAlloc->clearNeeded(Fs);
|
||||
mVU.regAlloc->clearNeeded(t1);
|
||||
mVU.regAlloc->clearNeeded(t2);
|
||||
mVU.profiler.EmitOp(opEnum);
|
||||
}
|
||||
pass3
|
||||
|
||||
Loading…
Reference in New Issue
Block a user